diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,27774 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.9994323557237466, - "eval_steps": 500, - "global_step": 3963, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0007568590350047304, - "grad_norm": 123.05032348632812, - "learning_rate": 0.0, - "loss": 2.0667, - "step": 1 - }, - { - "epoch": 0.0015137180700094607, - "grad_norm": 59.42024612426758, - "learning_rate": 3.5436764027111585e-06, - "loss": 1.9891, - "step": 2 - }, - { - "epoch": 0.002270577105014191, - "grad_norm": 74.41653442382812, - "learning_rate": 5.61659421298763e-06, - "loss": 1.9745, - "step": 3 - }, - { - "epoch": 0.0030274361400189215, - "grad_norm": 65.0828857421875, - "learning_rate": 7.087352805422317e-06, - "loss": 1.9442, - "step": 4 - }, - { - "epoch": 0.003784295175023652, - "grad_norm": 77.46288299560547, - "learning_rate": 8.228161798644422e-06, - "loss": 1.911, - "step": 5 - }, - { - "epoch": 0.004541154210028382, - "grad_norm": 140.7876434326172, - "learning_rate": 9.160270615698787e-06, - "loss": 1.9, - "step": 6 - }, - { - "epoch": 0.005298013245033113, - "grad_norm": 39.34813690185547, - "learning_rate": 9.948357391330555e-06, - "loss": 1.8421, - "step": 7 - }, - { - "epoch": 0.006054872280037843, - "grad_norm": 42.30311584472656, - "learning_rate": 1.0631029208133474e-05, - "loss": 1.8634, - "step": 8 - }, - { - "epoch": 0.006811731315042573, - "grad_norm": 56.799530029296875, - "learning_rate": 1.123318842597526e-05, - "loss": 1.8391, - "step": 9 - }, - { - "epoch": 0.007568590350047304, - "grad_norm": 74.55519104003906, - "learning_rate": 1.1771838201355582e-05, - "loss": 1.7531, - "step": 10 - }, - { - "epoch": 0.008325449385052034, - "grad_norm": 31.099952697753906, - "learning_rate": 1.2259106193757859e-05, - "loss": 1.7675, - "step": 11 - }, - { - "epoch": 0.009082308420056764, - "grad_norm": 49.344966888427734, - "learning_rate": 1.2703947018409945e-05, - "loss": 1.7896, - "step": 12 - }, - { - "epoch": 0.009839167455061495, - "grad_norm": 48.00835418701172, - "learning_rate": 1.311316090883172e-05, - "loss": 1.8585, - "step": 13 - }, - { - "epoch": 0.010596026490066225, - "grad_norm": 38.080078125, - "learning_rate": 1.3492033794041713e-05, - "loss": 1.7329, - "step": 14 - }, - { - "epoch": 0.011352885525070956, - "grad_norm": 45.796382904052734, - "learning_rate": 1.384475601163205e-05, - "loss": 1.8033, - "step": 15 - }, - { - "epoch": 0.012109744560075686, - "grad_norm": 35.87776184082031, - "learning_rate": 1.4174705610844634e-05, - "loss": 1.7784, - "step": 16 - }, - { - "epoch": 0.012866603595080416, - "grad_norm": 25.678325653076172, - "learning_rate": 1.4484645617497535e-05, - "loss": 1.7741, - "step": 17 - }, - { - "epoch": 0.013623462630085147, - "grad_norm": 28.66301918029785, - "learning_rate": 1.4776864828686414e-05, - "loss": 1.7957, - "step": 18 - }, - { - "epoch": 0.014380321665089877, - "grad_norm": 37.723976135253906, - "learning_rate": 1.505328048981752e-05, - "loss": 1.695, - "step": 19 - }, - { - "epoch": 0.015137180700094607, - "grad_norm": 36.247718811035156, - "learning_rate": 1.5315514604066738e-05, - "loss": 1.7189, - "step": 20 - }, - { - "epoch": 0.015894039735099338, - "grad_norm": 23.032033920288086, - "learning_rate": 1.5564951604318184e-05, - "loss": 1.6817, - "step": 21 - }, - { - "epoch": 0.016650898770104068, - "grad_norm": 28.1435489654541, - "learning_rate": 1.580278259646902e-05, - "loss": 1.7185, - "step": 22 - }, - { - "epoch": 0.0174077578051088, - "grad_norm": 75.195068359375, - "learning_rate": 1.603003975988117e-05, - "loss": 1.7109, - "step": 23 - }, - { - "epoch": 0.01816461684011353, - "grad_norm": 30.104032516479492, - "learning_rate": 1.6247623421121105e-05, - "loss": 1.6333, - "step": 24 - }, - { - "epoch": 0.01892147587511826, - "grad_norm": 24.25992774963379, - "learning_rate": 1.6456323597288844e-05, - "loss": 1.6416, - "step": 25 - }, - { - "epoch": 0.01967833491012299, - "grad_norm": 28.712949752807617, - "learning_rate": 1.6656837311542876e-05, - "loss": 1.6712, - "step": 26 - }, - { - "epoch": 0.02043519394512772, - "grad_norm": 26.72446060180664, - "learning_rate": 1.6849782638962885e-05, - "loss": 1.5939, - "step": 27 - }, - { - "epoch": 0.02119205298013245, - "grad_norm": 20.644784927368164, - "learning_rate": 1.7035710196752873e-05, - "loss": 1.6718, - "step": 28 - }, - { - "epoch": 0.02194891201513718, - "grad_norm": 24.643821716308594, - "learning_rate": 1.7215112617252848e-05, - "loss": 1.6778, - "step": 29 - }, - { - "epoch": 0.02270577105014191, - "grad_norm": 22.256315231323242, - "learning_rate": 1.738843241434321e-05, - "loss": 1.6233, - "step": 30 - }, - { - "epoch": 0.02346263008514664, - "grad_norm": 24.24241065979004, - "learning_rate": 1.7556068559516658e-05, - "loss": 1.6744, - "step": 31 - }, - { - "epoch": 0.024219489120151372, - "grad_norm": 37.310150146484375, - "learning_rate": 1.7718382013555794e-05, - "loss": 1.6556, - "step": 32 - }, - { - "epoch": 0.024976348155156102, - "grad_norm": 48.23684310913086, - "learning_rate": 1.7875700406745488e-05, - "loss": 1.6575, - "step": 33 - }, - { - "epoch": 0.025733207190160833, - "grad_norm": 24.166748046875, - "learning_rate": 1.8028322020208693e-05, - "loss": 1.6946, - "step": 34 - }, - { - "epoch": 0.026490066225165563, - "grad_norm": 29.902538299560547, - "learning_rate": 1.817651918997498e-05, - "loss": 1.6453, - "step": 35 - }, - { - "epoch": 0.027246925260170293, - "grad_norm": 27.081722259521484, - "learning_rate": 1.8320541231397574e-05, - "loss": 1.5933, - "step": 36 - }, - { - "epoch": 0.028003784295175024, - "grad_norm": 28.04783058166504, - "learning_rate": 1.8460616962803535e-05, - "loss": 1.676, - "step": 37 - }, - { - "epoch": 0.028760643330179754, - "grad_norm": 49.34148406982422, - "learning_rate": 1.859695689252868e-05, - "loss": 1.7431, - "step": 38 - }, - { - "epoch": 0.029517502365184484, - "grad_norm": 32.92803192138672, - "learning_rate": 1.872975512181935e-05, - "loss": 1.7004, - "step": 39 - }, - { - "epoch": 0.030274361400189215, - "grad_norm": 28.5103816986084, - "learning_rate": 1.8859191006777896e-05, - "loss": 1.646, - "step": 40 - }, - { - "epoch": 0.031031220435193945, - "grad_norm": 16.540956497192383, - "learning_rate": 1.8985430615062968e-05, - "loss": 1.6601, - "step": 41 - }, - { - "epoch": 0.031788079470198675, - "grad_norm": 26.588886260986328, - "learning_rate": 1.9108628007029345e-05, - "loss": 1.6273, - "step": 42 - }, - { - "epoch": 0.03254493850520341, - "grad_norm": 42.97163009643555, - "learning_rate": 1.9228926366101076e-05, - "loss": 1.5573, - "step": 43 - }, - { - "epoch": 0.033301797540208136, - "grad_norm": 40.343658447265625, - "learning_rate": 1.9346458999180177e-05, - "loss": 1.5821, - "step": 44 - }, - { - "epoch": 0.03405865657521287, - "grad_norm": 41.81525421142578, - "learning_rate": 1.946135022461968e-05, - "loss": 1.5927, - "step": 45 - }, - { - "epoch": 0.0348155156102176, - "grad_norm": 24.463436126708984, - "learning_rate": 1.9573716162592327e-05, - "loss": 1.6377, - "step": 46 - }, - { - "epoch": 0.03557237464522233, - "grad_norm": 21.16547203063965, - "learning_rate": 1.9683665440452515e-05, - "loss": 1.6151, - "step": 47 - }, - { - "epoch": 0.03632923368022706, - "grad_norm": 75.09718322753906, - "learning_rate": 1.9791299823832263e-05, - "loss": 1.6261, - "step": 48 - }, - { - "epoch": 0.03708609271523179, - "grad_norm": 30.04339027404785, - "learning_rate": 1.989671478266111e-05, - "loss": 1.6229, - "step": 49 - }, - { - "epoch": 0.03784295175023652, - "grad_norm": 100.00825500488281, - "learning_rate": 2e-05, - "loss": 1.6116, - "step": 50 - }, - { - "epoch": 0.03859981078524125, - "grad_norm": 28.68238639831543, - "learning_rate": 1.9999996777398546e-05, - "loss": 1.6644, - "step": 51 - }, - { - "epoch": 0.03935666982024598, - "grad_norm": 68.90675354003906, - "learning_rate": 1.9999987109596254e-05, - "loss": 1.6159, - "step": 52 - }, - { - "epoch": 0.04011352885525071, - "grad_norm": 81.32110595703125, - "learning_rate": 1.999997099659936e-05, - "loss": 1.7117, - "step": 53 - }, - { - "epoch": 0.04087038789025544, - "grad_norm": 171.38938903808594, - "learning_rate": 1.999994843841825e-05, - "loss": 1.5922, - "step": 54 - }, - { - "epoch": 0.041627246925260174, - "grad_norm": 43.32768249511719, - "learning_rate": 1.9999919435067465e-05, - "loss": 1.6245, - "step": 55 - }, - { - "epoch": 0.0423841059602649, - "grad_norm": 74.8743896484375, - "learning_rate": 1.9999883986565696e-05, - "loss": 1.6613, - "step": 56 - }, - { - "epoch": 0.043140964995269634, - "grad_norm": 18.4515438079834, - "learning_rate": 1.9999842092935797e-05, - "loss": 1.6048, - "step": 57 - }, - { - "epoch": 0.04389782403027436, - "grad_norm": 22.151155471801758, - "learning_rate": 1.999979375420477e-05, - "loss": 1.632, - "step": 58 - }, - { - "epoch": 0.044654683065279095, - "grad_norm": 19.156835556030273, - "learning_rate": 1.9999738970403775e-05, - "loss": 1.6189, - "step": 59 - }, - { - "epoch": 0.04541154210028382, - "grad_norm": 18.256269454956055, - "learning_rate": 1.999967774156812e-05, - "loss": 1.6306, - "step": 60 - }, - { - "epoch": 0.046168401135288556, - "grad_norm": 26.946420669555664, - "learning_rate": 1.999961006773728e-05, - "loss": 1.6411, - "step": 61 - }, - { - "epoch": 0.04692526017029328, - "grad_norm": 23.500892639160156, - "learning_rate": 1.999953594895487e-05, - "loss": 1.5876, - "step": 62 - }, - { - "epoch": 0.04768211920529802, - "grad_norm": 12.633379936218262, - "learning_rate": 1.999945538526867e-05, - "loss": 1.601, - "step": 63 - }, - { - "epoch": 0.048438978240302744, - "grad_norm": 15.04751205444336, - "learning_rate": 1.999936837673061e-05, - "loss": 1.6078, - "step": 64 - }, - { - "epoch": 0.04919583727530748, - "grad_norm": 11.406414985656738, - "learning_rate": 1.999927492339677e-05, - "loss": 1.5959, - "step": 65 - }, - { - "epoch": 0.049952696310312204, - "grad_norm": 11.040087699890137, - "learning_rate": 1.9999175025327395e-05, - "loss": 1.6025, - "step": 66 - }, - { - "epoch": 0.05070955534531694, - "grad_norm": 10.689179420471191, - "learning_rate": 1.999906868258687e-05, - "loss": 1.5797, - "step": 67 - }, - { - "epoch": 0.051466414380321665, - "grad_norm": 8.19336986541748, - "learning_rate": 1.9998955895243748e-05, - "loss": 1.5564, - "step": 68 - }, - { - "epoch": 0.0522232734153264, - "grad_norm": 13.38058853149414, - "learning_rate": 1.9998836663370726e-05, - "loss": 1.5584, - "step": 69 - }, - { - "epoch": 0.052980132450331126, - "grad_norm": 7.053563594818115, - "learning_rate": 1.9998710987044664e-05, - "loss": 1.5005, - "step": 70 - }, - { - "epoch": 0.05373699148533586, - "grad_norm": 6.7353105545043945, - "learning_rate": 1.9998578866346564e-05, - "loss": 1.5306, - "step": 71 - }, - { - "epoch": 0.054493850520340587, - "grad_norm": 5.975197792053223, - "learning_rate": 1.9998440301361598e-05, - "loss": 1.5885, - "step": 72 - }, - { - "epoch": 0.05525070955534532, - "grad_norm": 6.6494011878967285, - "learning_rate": 1.9998295292179073e-05, - "loss": 1.532, - "step": 73 - }, - { - "epoch": 0.05600756859035005, - "grad_norm": 5.434142589569092, - "learning_rate": 1.9998143838892468e-05, - "loss": 1.5764, - "step": 74 - }, - { - "epoch": 0.05676442762535478, - "grad_norm": 6.099053859710693, - "learning_rate": 1.99979859415994e-05, - "loss": 1.5187, - "step": 75 - }, - { - "epoch": 0.05752128666035951, - "grad_norm": 5.7470855712890625, - "learning_rate": 1.999782160040166e-05, - "loss": 1.5377, - "step": 76 - }, - { - "epoch": 0.05827814569536424, - "grad_norm": 5.577144145965576, - "learning_rate": 1.9997650815405167e-05, - "loss": 1.4817, - "step": 77 - }, - { - "epoch": 0.05903500473036897, - "grad_norm": 5.294313907623291, - "learning_rate": 1.999747358672001e-05, - "loss": 1.4812, - "step": 78 - }, - { - "epoch": 0.0597918637653737, - "grad_norm": 5.860252380371094, - "learning_rate": 1.9997289914460428e-05, - "loss": 1.524, - "step": 79 - }, - { - "epoch": 0.06054872280037843, - "grad_norm": 9.259795188903809, - "learning_rate": 1.9997099798744815e-05, - "loss": 1.4869, - "step": 80 - }, - { - "epoch": 0.06130558183538316, - "grad_norm": 5.9855852127075195, - "learning_rate": 1.999690323969571e-05, - "loss": 1.5187, - "step": 81 - }, - { - "epoch": 0.06206244087038789, - "grad_norm": 6.138685703277588, - "learning_rate": 1.9996700237439823e-05, - "loss": 1.5468, - "step": 82 - }, - { - "epoch": 0.06281929990539262, - "grad_norm": 6.596303939819336, - "learning_rate": 1.9996490792107997e-05, - "loss": 1.4899, - "step": 83 - }, - { - "epoch": 0.06357615894039735, - "grad_norm": 5.794029712677002, - "learning_rate": 1.9996274903835247e-05, - "loss": 1.4633, - "step": 84 - }, - { - "epoch": 0.06433301797540208, - "grad_norm": 5.815922260284424, - "learning_rate": 1.9996052572760723e-05, - "loss": 1.4968, - "step": 85 - }, - { - "epoch": 0.06508987701040682, - "grad_norm": 11.239161491394043, - "learning_rate": 1.9995823799027737e-05, - "loss": 1.4604, - "step": 86 - }, - { - "epoch": 0.06584673604541154, - "grad_norm": 5.367482662200928, - "learning_rate": 1.9995588582783753e-05, - "loss": 1.518, - "step": 87 - }, - { - "epoch": 0.06660359508041627, - "grad_norm": 19.351980209350586, - "learning_rate": 1.9995346924180394e-05, - "loss": 1.5267, - "step": 88 - }, - { - "epoch": 0.067360454115421, - "grad_norm": 6.7129316329956055, - "learning_rate": 1.999509882337342e-05, - "loss": 1.4639, - "step": 89 - }, - { - "epoch": 0.06811731315042574, - "grad_norm": 6.777989387512207, - "learning_rate": 1.999484428052276e-05, - "loss": 1.5127, - "step": 90 - }, - { - "epoch": 0.06887417218543046, - "grad_norm": 14.996123313903809, - "learning_rate": 1.9994583295792487e-05, - "loss": 1.5277, - "step": 91 - }, - { - "epoch": 0.0696310312204352, - "grad_norm": 6.6374311447143555, - "learning_rate": 1.9994315869350826e-05, - "loss": 1.4834, - "step": 92 - }, - { - "epoch": 0.07038789025543993, - "grad_norm": 11.19003963470459, - "learning_rate": 1.9994042001370154e-05, - "loss": 1.5084, - "step": 93 - }, - { - "epoch": 0.07114474929044466, - "grad_norm": 6.2547407150268555, - "learning_rate": 1.9993761692027007e-05, - "loss": 1.485, - "step": 94 - }, - { - "epoch": 0.07190160832544938, - "grad_norm": 6.645302772521973, - "learning_rate": 1.9993474941502067e-05, - "loss": 1.463, - "step": 95 - }, - { - "epoch": 0.07265846736045412, - "grad_norm": 7.073038578033447, - "learning_rate": 1.9993181749980168e-05, - "loss": 1.509, - "step": 96 - }, - { - "epoch": 0.07341532639545885, - "grad_norm": 6.401993274688721, - "learning_rate": 1.99928821176503e-05, - "loss": 1.4958, - "step": 97 - }, - { - "epoch": 0.07417218543046358, - "grad_norm": 6.126581192016602, - "learning_rate": 1.9992576044705596e-05, - "loss": 1.4449, - "step": 98 - }, - { - "epoch": 0.0749290444654683, - "grad_norm": 8.766273498535156, - "learning_rate": 1.9992263531343348e-05, - "loss": 1.5218, - "step": 99 - }, - { - "epoch": 0.07568590350047304, - "grad_norm": 5.65410852432251, - "learning_rate": 1.9991944577764996e-05, - "loss": 1.5205, - "step": 100 - }, - { - "epoch": 0.07644276253547777, - "grad_norm": 5.447603702545166, - "learning_rate": 1.9991619184176136e-05, - "loss": 1.4651, - "step": 101 - }, - { - "epoch": 0.0771996215704825, - "grad_norm": 5.317190647125244, - "learning_rate": 1.9991287350786512e-05, - "loss": 1.5059, - "step": 102 - }, - { - "epoch": 0.07795648060548722, - "grad_norm": 5.233520984649658, - "learning_rate": 1.9990949077810015e-05, - "loss": 1.4556, - "step": 103 - }, - { - "epoch": 0.07871333964049196, - "grad_norm": 4.955499649047852, - "learning_rate": 1.9990604365464693e-05, - "loss": 1.4236, - "step": 104 - }, - { - "epoch": 0.07947019867549669, - "grad_norm": 9.175353050231934, - "learning_rate": 1.9990253213972742e-05, - "loss": 1.4482, - "step": 105 - }, - { - "epoch": 0.08022705771050143, - "grad_norm": 5.2216057777404785, - "learning_rate": 1.998989562356051e-05, - "loss": 1.4902, - "step": 106 - }, - { - "epoch": 0.08098391674550615, - "grad_norm": 4.395474910736084, - "learning_rate": 1.9989531594458487e-05, - "loss": 1.4419, - "step": 107 - }, - { - "epoch": 0.08174077578051088, - "grad_norm": 4.641335487365723, - "learning_rate": 1.998916112690133e-05, - "loss": 1.4715, - "step": 108 - }, - { - "epoch": 0.08249763481551561, - "grad_norm": 5.315745830535889, - "learning_rate": 1.9988784221127834e-05, - "loss": 1.4742, - "step": 109 - }, - { - "epoch": 0.08325449385052035, - "grad_norm": 5.404274940490723, - "learning_rate": 1.998840087738095e-05, - "loss": 1.4579, - "step": 110 - }, - { - "epoch": 0.08401135288552507, - "grad_norm": 4.288702011108398, - "learning_rate": 1.9988011095907768e-05, - "loss": 1.49, - "step": 111 - }, - { - "epoch": 0.0847682119205298, - "grad_norm": 4.434887409210205, - "learning_rate": 1.9987614876959536e-05, - "loss": 1.4946, - "step": 112 - }, - { - "epoch": 0.08552507095553454, - "grad_norm": 5.428564071655273, - "learning_rate": 1.9987212220791657e-05, - "loss": 1.3817, - "step": 113 - }, - { - "epoch": 0.08628192999053927, - "grad_norm": 3.9893720149993896, - "learning_rate": 1.9986803127663672e-05, - "loss": 1.4428, - "step": 114 - }, - { - "epoch": 0.08703878902554399, - "grad_norm": 4.35543966293335, - "learning_rate": 1.998638759783928e-05, - "loss": 1.3801, - "step": 115 - }, - { - "epoch": 0.08779564806054872, - "grad_norm": 4.2772722244262695, - "learning_rate": 1.9985965631586318e-05, - "loss": 1.3975, - "step": 116 - }, - { - "epoch": 0.08855250709555346, - "grad_norm": 4.769036769866943, - "learning_rate": 1.9985537229176787e-05, - "loss": 1.4413, - "step": 117 - }, - { - "epoch": 0.08930936613055819, - "grad_norm": 4.7659759521484375, - "learning_rate": 1.9985102390886825e-05, - "loss": 1.4665, - "step": 118 - }, - { - "epoch": 0.09006622516556291, - "grad_norm": 5.218923091888428, - "learning_rate": 1.9984661116996723e-05, - "loss": 1.4544, - "step": 119 - }, - { - "epoch": 0.09082308420056764, - "grad_norm": 4.296699047088623, - "learning_rate": 1.9984213407790924e-05, - "loss": 1.3944, - "step": 120 - }, - { - "epoch": 0.09157994323557238, - "grad_norm": 3.866936683654785, - "learning_rate": 1.9983759263558003e-05, - "loss": 1.4273, - "step": 121 - }, - { - "epoch": 0.09233680227057711, - "grad_norm": 4.711172103881836, - "learning_rate": 1.99832986845907e-05, - "loss": 1.4208, - "step": 122 - }, - { - "epoch": 0.09309366130558183, - "grad_norm": 4.553902626037598, - "learning_rate": 1.9982831671185905e-05, - "loss": 1.525, - "step": 123 - }, - { - "epoch": 0.09385052034058657, - "grad_norm": 4.0878801345825195, - "learning_rate": 1.9982358223644635e-05, - "loss": 1.4621, - "step": 124 - }, - { - "epoch": 0.0946073793755913, - "grad_norm": 4.239192485809326, - "learning_rate": 1.9981878342272074e-05, - "loss": 1.4151, - "step": 125 - }, - { - "epoch": 0.09536423841059603, - "grad_norm": 3.9742391109466553, - "learning_rate": 1.9981392027377548e-05, - "loss": 1.4588, - "step": 126 - }, - { - "epoch": 0.09612109744560075, - "grad_norm": 4.459286212921143, - "learning_rate": 1.9980899279274523e-05, - "loss": 1.414, - "step": 127 - }, - { - "epoch": 0.09687795648060549, - "grad_norm": 4.164027214050293, - "learning_rate": 1.9980400098280622e-05, - "loss": 1.4572, - "step": 128 - }, - { - "epoch": 0.09763481551561022, - "grad_norm": 4.778876304626465, - "learning_rate": 1.9979894484717604e-05, - "loss": 1.4279, - "step": 129 - }, - { - "epoch": 0.09839167455061495, - "grad_norm": 4.639044761657715, - "learning_rate": 1.9979382438911383e-05, - "loss": 1.4343, - "step": 130 - }, - { - "epoch": 0.09914853358561967, - "grad_norm": 4.090446949005127, - "learning_rate": 1.9978863961192018e-05, - "loss": 1.4802, - "step": 131 - }, - { - "epoch": 0.09990539262062441, - "grad_norm": 4.360771656036377, - "learning_rate": 1.9978339051893702e-05, - "loss": 1.4552, - "step": 132 - }, - { - "epoch": 0.10066225165562914, - "grad_norm": 3.851464033126831, - "learning_rate": 1.9977807711354796e-05, - "loss": 1.3779, - "step": 133 - }, - { - "epoch": 0.10141911069063388, - "grad_norm": 4.016122341156006, - "learning_rate": 1.997726993991779e-05, - "loss": 1.4313, - "step": 134 - }, - { - "epoch": 0.1021759697256386, - "grad_norm": 4.009467124938965, - "learning_rate": 1.997672573792932e-05, - "loss": 1.491, - "step": 135 - }, - { - "epoch": 0.10293282876064333, - "grad_norm": 3.8596322536468506, - "learning_rate": 1.997617510574018e-05, - "loss": 1.4724, - "step": 136 - }, - { - "epoch": 0.10368968779564806, - "grad_norm": 3.6011574268341064, - "learning_rate": 1.9975618043705282e-05, - "loss": 1.3931, - "step": 137 - }, - { - "epoch": 0.1044465468306528, - "grad_norm": 4.025736331939697, - "learning_rate": 1.997505455218371e-05, - "loss": 1.4269, - "step": 138 - }, - { - "epoch": 0.10520340586565752, - "grad_norm": 3.760977268218994, - "learning_rate": 1.9974484631538685e-05, - "loss": 1.4311, - "step": 139 - }, - { - "epoch": 0.10596026490066225, - "grad_norm": 4.554644584655762, - "learning_rate": 1.9973908282137565e-05, - "loss": 1.4535, - "step": 140 - }, - { - "epoch": 0.10671712393566699, - "grad_norm": 4.12142276763916, - "learning_rate": 1.9973325504351856e-05, - "loss": 1.4111, - "step": 141 - }, - { - "epoch": 0.10747398297067172, - "grad_norm": 3.9459025859832764, - "learning_rate": 1.9972736298557207e-05, - "loss": 1.4424, - "step": 142 - }, - { - "epoch": 0.10823084200567644, - "grad_norm": 3.65413236618042, - "learning_rate": 1.9972140665133412e-05, - "loss": 1.3589, - "step": 143 - }, - { - "epoch": 0.10898770104068117, - "grad_norm": 3.935250997543335, - "learning_rate": 1.997153860446441e-05, - "loss": 1.3985, - "step": 144 - }, - { - "epoch": 0.1097445600756859, - "grad_norm": 4.394814968109131, - "learning_rate": 1.9970930116938273e-05, - "loss": 1.4304, - "step": 145 - }, - { - "epoch": 0.11050141911069064, - "grad_norm": 3.6491141319274902, - "learning_rate": 1.997031520294723e-05, - "loss": 1.3928, - "step": 146 - }, - { - "epoch": 0.11125827814569536, - "grad_norm": 4.235386848449707, - "learning_rate": 1.9969693862887643e-05, - "loss": 1.4712, - "step": 147 - }, - { - "epoch": 0.1120151371807001, - "grad_norm": 4.189138412475586, - "learning_rate": 1.996906609716002e-05, - "loss": 1.3609, - "step": 148 - }, - { - "epoch": 0.11277199621570483, - "grad_norm": 3.729450225830078, - "learning_rate": 1.9968431906169005e-05, - "loss": 1.4229, - "step": 149 - }, - { - "epoch": 0.11352885525070956, - "grad_norm": 3.915863513946533, - "learning_rate": 1.996779129032339e-05, - "loss": 1.3628, - "step": 150 - }, - { - "epoch": 0.11428571428571428, - "grad_norm": 4.461569786071777, - "learning_rate": 1.9967144250036104e-05, - "loss": 1.4087, - "step": 151 - }, - { - "epoch": 0.11504257332071902, - "grad_norm": 4.412698745727539, - "learning_rate": 1.9966490785724223e-05, - "loss": 1.4392, - "step": 152 - }, - { - "epoch": 0.11579943235572375, - "grad_norm": 4.236743450164795, - "learning_rate": 1.9965830897808954e-05, - "loss": 1.4391, - "step": 153 - }, - { - "epoch": 0.11655629139072848, - "grad_norm": 4.672597408294678, - "learning_rate": 1.996516458671566e-05, - "loss": 1.3995, - "step": 154 - }, - { - "epoch": 0.1173131504257332, - "grad_norm": 5.059709072113037, - "learning_rate": 1.9964491852873833e-05, - "loss": 1.3566, - "step": 155 - }, - { - "epoch": 0.11807000946073794, - "grad_norm": 4.973750114440918, - "learning_rate": 1.99638126967171e-05, - "loss": 1.3993, - "step": 156 - }, - { - "epoch": 0.11882686849574267, - "grad_norm": 4.362597942352295, - "learning_rate": 1.996312711868324e-05, - "loss": 1.4254, - "step": 157 - }, - { - "epoch": 0.1195837275307474, - "grad_norm": 4.407685279846191, - "learning_rate": 1.9962435119214164e-05, - "loss": 1.3983, - "step": 158 - }, - { - "epoch": 0.12034058656575213, - "grad_norm": 4.614277362823486, - "learning_rate": 1.9961736698755928e-05, - "loss": 1.412, - "step": 159 - }, - { - "epoch": 0.12109744560075686, - "grad_norm": 4.18186092376709, - "learning_rate": 1.9961031857758718e-05, - "loss": 1.3653, - "step": 160 - }, - { - "epoch": 0.12185430463576159, - "grad_norm": 4.011139392852783, - "learning_rate": 1.9960320596676866e-05, - "loss": 1.4234, - "step": 161 - }, - { - "epoch": 0.12261116367076633, - "grad_norm": 4.428970813751221, - "learning_rate": 1.9959602915968842e-05, - "loss": 1.3899, - "step": 162 - }, - { - "epoch": 0.12336802270577105, - "grad_norm": 4.968282222747803, - "learning_rate": 1.995887881609725e-05, - "loss": 1.4235, - "step": 163 - }, - { - "epoch": 0.12412488174077578, - "grad_norm": 4.600246906280518, - "learning_rate": 1.9958148297528833e-05, - "loss": 1.3656, - "step": 164 - }, - { - "epoch": 0.12488174077578051, - "grad_norm": 4.392306804656982, - "learning_rate": 1.9957411360734476e-05, - "loss": 1.3804, - "step": 165 - }, - { - "epoch": 0.12563859981078523, - "grad_norm": 4.066370964050293, - "learning_rate": 1.995666800618919e-05, - "loss": 1.4013, - "step": 166 - }, - { - "epoch": 0.12639545884578998, - "grad_norm": 3.9358901977539062, - "learning_rate": 1.995591823437214e-05, - "loss": 1.3887, - "step": 167 - }, - { - "epoch": 0.1271523178807947, - "grad_norm": 4.182436466217041, - "learning_rate": 1.9955162045766607e-05, - "loss": 1.4011, - "step": 168 - }, - { - "epoch": 0.12790917691579942, - "grad_norm": 3.8715391159057617, - "learning_rate": 1.9954399440860026e-05, - "loss": 1.3881, - "step": 169 - }, - { - "epoch": 0.12866603595080417, - "grad_norm": 4.34489107131958, - "learning_rate": 1.9953630420143958e-05, - "loss": 1.3679, - "step": 170 - }, - { - "epoch": 0.1294228949858089, - "grad_norm": 4.175931930541992, - "learning_rate": 1.9952854984114097e-05, - "loss": 1.3995, - "step": 171 - }, - { - "epoch": 0.13017975402081364, - "grad_norm": 6.231164932250977, - "learning_rate": 1.9952073133270288e-05, - "loss": 1.3826, - "step": 172 - }, - { - "epoch": 0.13093661305581836, - "grad_norm": 3.969299554824829, - "learning_rate": 1.9951284868116495e-05, - "loss": 1.427, - "step": 173 - }, - { - "epoch": 0.13169347209082308, - "grad_norm": 5.056988716125488, - "learning_rate": 1.9950490189160818e-05, - "loss": 1.4377, - "step": 174 - }, - { - "epoch": 0.13245033112582782, - "grad_norm": 3.9641916751861572, - "learning_rate": 1.99496890969155e-05, - "loss": 1.3729, - "step": 175 - }, - { - "epoch": 0.13320719016083254, - "grad_norm": 4.119785308837891, - "learning_rate": 1.9948881591896913e-05, - "loss": 1.4061, - "step": 176 - }, - { - "epoch": 0.13396404919583726, - "grad_norm": 4.154798984527588, - "learning_rate": 1.9948067674625557e-05, - "loss": 1.4383, - "step": 177 - }, - { - "epoch": 0.134720908230842, - "grad_norm": 4.396413326263428, - "learning_rate": 1.994724734562607e-05, - "loss": 1.3806, - "step": 178 - }, - { - "epoch": 0.13547776726584673, - "grad_norm": 10.802559852600098, - "learning_rate": 1.9946420605427235e-05, - "loss": 1.4279, - "step": 179 - }, - { - "epoch": 0.13623462630085148, - "grad_norm": 4.602297782897949, - "learning_rate": 1.9945587454561944e-05, - "loss": 1.3618, - "step": 180 - }, - { - "epoch": 0.1369914853358562, - "grad_norm": 4.874974727630615, - "learning_rate": 1.994474789356724e-05, - "loss": 1.3582, - "step": 181 - }, - { - "epoch": 0.13774834437086092, - "grad_norm": 5.023828983306885, - "learning_rate": 1.994390192298429e-05, - "loss": 1.3445, - "step": 182 - }, - { - "epoch": 0.13850520340586567, - "grad_norm": 4.938666343688965, - "learning_rate": 1.994304954335839e-05, - "loss": 1.4221, - "step": 183 - }, - { - "epoch": 0.1392620624408704, - "grad_norm": 5.975377559661865, - "learning_rate": 1.9942190755238973e-05, - "loss": 1.3947, - "step": 184 - }, - { - "epoch": 0.1400189214758751, - "grad_norm": 8.078311920166016, - "learning_rate": 1.9941325559179608e-05, - "loss": 1.3925, - "step": 185 - }, - { - "epoch": 0.14077578051087986, - "grad_norm": 5.0124897956848145, - "learning_rate": 1.9940453955737976e-05, - "loss": 1.3958, - "step": 186 - }, - { - "epoch": 0.14153263954588458, - "grad_norm": 4.94537353515625, - "learning_rate": 1.9939575945475905e-05, - "loss": 1.3855, - "step": 187 - }, - { - "epoch": 0.14228949858088932, - "grad_norm": 5.828818321228027, - "learning_rate": 1.9938691528959348e-05, - "loss": 1.4567, - "step": 188 - }, - { - "epoch": 0.14304635761589404, - "grad_norm": 4.672356605529785, - "learning_rate": 1.993780070675838e-05, - "loss": 1.3581, - "step": 189 - }, - { - "epoch": 0.14380321665089876, - "grad_norm": 5.052429676055908, - "learning_rate": 1.993690347944722e-05, - "loss": 1.3874, - "step": 190 - }, - { - "epoch": 0.1445600756859035, - "grad_norm": 4.454349040985107, - "learning_rate": 1.9935999847604204e-05, - "loss": 1.4282, - "step": 191 - }, - { - "epoch": 0.14531693472090823, - "grad_norm": 4.81812858581543, - "learning_rate": 1.9935089811811794e-05, - "loss": 1.4103, - "step": 192 - }, - { - "epoch": 0.14607379375591295, - "grad_norm": 3.8706412315368652, - "learning_rate": 1.993417337265659e-05, - "loss": 1.4024, - "step": 193 - }, - { - "epoch": 0.1468306527909177, - "grad_norm": 3.948594093322754, - "learning_rate": 1.9933250530729314e-05, - "loss": 1.387, - "step": 194 - }, - { - "epoch": 0.14758751182592242, - "grad_norm": 4.73719596862793, - "learning_rate": 1.993232128662482e-05, - "loss": 1.4528, - "step": 195 - }, - { - "epoch": 0.14834437086092717, - "grad_norm": 3.9017584323883057, - "learning_rate": 1.993138564094208e-05, - "loss": 1.4245, - "step": 196 - }, - { - "epoch": 0.14910122989593189, - "grad_norm": 6.6446309089660645, - "learning_rate": 1.9930443594284193e-05, - "loss": 1.4046, - "step": 197 - }, - { - "epoch": 0.1498580889309366, - "grad_norm": 4.191623210906982, - "learning_rate": 1.9929495147258395e-05, - "loss": 1.3987, - "step": 198 - }, - { - "epoch": 0.15061494796594135, - "grad_norm": 3.8362607955932617, - "learning_rate": 1.992854030047604e-05, - "loss": 1.3583, - "step": 199 - }, - { - "epoch": 0.15137180700094607, - "grad_norm": 4.051894187927246, - "learning_rate": 1.9927579054552603e-05, - "loss": 1.3856, - "step": 200 - }, - { - "epoch": 0.1521286660359508, - "grad_norm": 3.792412281036377, - "learning_rate": 1.992661141010769e-05, - "loss": 1.3961, - "step": 201 - }, - { - "epoch": 0.15288552507095554, - "grad_norm": 3.697641134262085, - "learning_rate": 1.992563736776503e-05, - "loss": 1.3808, - "step": 202 - }, - { - "epoch": 0.15364238410596026, - "grad_norm": 4.134721279144287, - "learning_rate": 1.992465692815248e-05, - "loss": 1.3594, - "step": 203 - }, - { - "epoch": 0.154399243140965, - "grad_norm": 4.171304225921631, - "learning_rate": 1.9923670091902013e-05, - "loss": 1.4217, - "step": 204 - }, - { - "epoch": 0.15515610217596973, - "grad_norm": 3.476039171218872, - "learning_rate": 1.992267685964973e-05, - "loss": 1.3967, - "step": 205 - }, - { - "epoch": 0.15591296121097445, - "grad_norm": 3.4347240924835205, - "learning_rate": 1.9921677232035846e-05, - "loss": 1.3422, - "step": 206 - }, - { - "epoch": 0.1566698202459792, - "grad_norm": 3.7200000286102295, - "learning_rate": 1.992067120970472e-05, - "loss": 1.3538, - "step": 207 - }, - { - "epoch": 0.15742667928098392, - "grad_norm": 3.8184263706207275, - "learning_rate": 1.9919658793304804e-05, - "loss": 1.3956, - "step": 208 - }, - { - "epoch": 0.15818353831598864, - "grad_norm": 3.761478900909424, - "learning_rate": 1.9918639983488694e-05, - "loss": 1.4233, - "step": 209 - }, - { - "epoch": 0.15894039735099338, - "grad_norm": 3.587502956390381, - "learning_rate": 1.99176147809131e-05, - "loss": 1.3514, - "step": 210 - }, - { - "epoch": 0.1596972563859981, - "grad_norm": 3.3828699588775635, - "learning_rate": 1.9916583186238847e-05, - "loss": 1.3766, - "step": 211 - }, - { - "epoch": 0.16045411542100285, - "grad_norm": 3.2444939613342285, - "learning_rate": 1.9915545200130893e-05, - "loss": 1.4051, - "step": 212 - }, - { - "epoch": 0.16121097445600757, - "grad_norm": 3.4360880851745605, - "learning_rate": 1.9914500823258298e-05, - "loss": 1.3364, - "step": 213 - }, - { - "epoch": 0.1619678334910123, - "grad_norm": 3.3002805709838867, - "learning_rate": 1.9913450056294255e-05, - "loss": 1.3807, - "step": 214 - }, - { - "epoch": 0.16272469252601704, - "grad_norm": 3.551203489303589, - "learning_rate": 1.991239289991608e-05, - "loss": 1.4077, - "step": 215 - }, - { - "epoch": 0.16348155156102176, - "grad_norm": 2.9857335090637207, - "learning_rate": 1.991132935480519e-05, - "loss": 1.3667, - "step": 216 - }, - { - "epoch": 0.16423841059602648, - "grad_norm": 3.935084342956543, - "learning_rate": 1.9910259421647136e-05, - "loss": 1.3973, - "step": 217 - }, - { - "epoch": 0.16499526963103123, - "grad_norm": 3.209479570388794, - "learning_rate": 1.9909183101131576e-05, - "loss": 1.3752, - "step": 218 - }, - { - "epoch": 0.16575212866603595, - "grad_norm": 3.311500072479248, - "learning_rate": 1.9908100393952293e-05, - "loss": 1.3566, - "step": 219 - }, - { - "epoch": 0.1665089877010407, - "grad_norm": 3.0751259326934814, - "learning_rate": 1.990701130080718e-05, - "loss": 1.411, - "step": 220 - }, - { - "epoch": 0.16726584673604541, - "grad_norm": 3.3133180141448975, - "learning_rate": 1.9905915822398257e-05, - "loss": 1.4006, - "step": 221 - }, - { - "epoch": 0.16802270577105013, - "grad_norm": 3.2017252445220947, - "learning_rate": 1.9904813959431646e-05, - "loss": 1.4028, - "step": 222 - }, - { - "epoch": 0.16877956480605488, - "grad_norm": 3.404691219329834, - "learning_rate": 1.9903705712617595e-05, - "loss": 1.355, - "step": 223 - }, - { - "epoch": 0.1695364238410596, - "grad_norm": 3.1049623489379883, - "learning_rate": 1.990259108267046e-05, - "loss": 1.3305, - "step": 224 - }, - { - "epoch": 0.17029328287606432, - "grad_norm": 3.3933444023132324, - "learning_rate": 1.990147007030871e-05, - "loss": 1.3718, - "step": 225 - }, - { - "epoch": 0.17105014191106907, - "grad_norm": 3.479591131210327, - "learning_rate": 1.9900342676254945e-05, - "loss": 1.393, - "step": 226 - }, - { - "epoch": 0.1718070009460738, - "grad_norm": 3.3810219764709473, - "learning_rate": 1.989920890123586e-05, - "loss": 1.3864, - "step": 227 - }, - { - "epoch": 0.17256385998107854, - "grad_norm": 3.4179928302764893, - "learning_rate": 1.9898068745982263e-05, - "loss": 1.3322, - "step": 228 - }, - { - "epoch": 0.17332071901608326, - "grad_norm": 3.288922071456909, - "learning_rate": 1.9896922211229088e-05, - "loss": 1.3738, - "step": 229 - }, - { - "epoch": 0.17407757805108798, - "grad_norm": 3.4045164585113525, - "learning_rate": 1.9895769297715373e-05, - "loss": 1.3509, - "step": 230 - }, - { - "epoch": 0.17483443708609273, - "grad_norm": 3.384779453277588, - "learning_rate": 1.9894610006184264e-05, - "loss": 1.3596, - "step": 231 - }, - { - "epoch": 0.17559129612109745, - "grad_norm": 3.6631815433502197, - "learning_rate": 1.989344433738303e-05, - "loss": 1.4126, - "step": 232 - }, - { - "epoch": 0.17634815515610217, - "grad_norm": 3.1958444118499756, - "learning_rate": 1.9892272292063034e-05, - "loss": 1.3711, - "step": 233 - }, - { - "epoch": 0.1771050141911069, - "grad_norm": 3.4087891578674316, - "learning_rate": 1.989109387097977e-05, - "loss": 1.3604, - "step": 234 - }, - { - "epoch": 0.17786187322611163, - "grad_norm": 3.5950968265533447, - "learning_rate": 1.988990907489282e-05, - "loss": 1.3464, - "step": 235 - }, - { - "epoch": 0.17861873226111638, - "grad_norm": 3.5223278999328613, - "learning_rate": 1.988871790456589e-05, - "loss": 1.3965, - "step": 236 - }, - { - "epoch": 0.1793755912961211, - "grad_norm": 3.259669780731201, - "learning_rate": 1.988752036076679e-05, - "loss": 1.3915, - "step": 237 - }, - { - "epoch": 0.18013245033112582, - "grad_norm": 3.1698622703552246, - "learning_rate": 1.9886316444267436e-05, - "loss": 1.3674, - "step": 238 - }, - { - "epoch": 0.18088930936613057, - "grad_norm": 3.336416482925415, - "learning_rate": 1.9885106155843857e-05, - "loss": 1.3476, - "step": 239 - }, - { - "epoch": 0.1816461684011353, - "grad_norm": 3.2986626625061035, - "learning_rate": 1.9883889496276188e-05, - "loss": 1.3139, - "step": 240 - }, - { - "epoch": 0.18240302743614, - "grad_norm": 3.2197721004486084, - "learning_rate": 1.9882666466348665e-05, - "loss": 1.3611, - "step": 241 - }, - { - "epoch": 0.18315988647114476, - "grad_norm": 3.120088815689087, - "learning_rate": 1.988143706684964e-05, - "loss": 1.364, - "step": 242 - }, - { - "epoch": 0.18391674550614948, - "grad_norm": 2.9464049339294434, - "learning_rate": 1.9880201298571558e-05, - "loss": 1.3295, - "step": 243 - }, - { - "epoch": 0.18467360454115422, - "grad_norm": 3.3369717597961426, - "learning_rate": 1.9878959162310983e-05, - "loss": 1.3669, - "step": 244 - }, - { - "epoch": 0.18543046357615894, - "grad_norm": 2.900787353515625, - "learning_rate": 1.987771065886857e-05, - "loss": 1.3565, - "step": 245 - }, - { - "epoch": 0.18618732261116366, - "grad_norm": 3.0211544036865234, - "learning_rate": 1.9876455789049096e-05, - "loss": 1.3882, - "step": 246 - }, - { - "epoch": 0.1869441816461684, - "grad_norm": 3.1576292514801025, - "learning_rate": 1.9875194553661415e-05, - "loss": 1.3075, - "step": 247 - }, - { - "epoch": 0.18770104068117313, - "grad_norm": 2.8850550651550293, - "learning_rate": 1.9873926953518515e-05, - "loss": 1.3665, - "step": 248 - }, - { - "epoch": 0.18845789971617785, - "grad_norm": 3.188582420349121, - "learning_rate": 1.9872652989437467e-05, - "loss": 1.3555, - "step": 249 - }, - { - "epoch": 0.1892147587511826, - "grad_norm": 4.313934803009033, - "learning_rate": 1.9871372662239446e-05, - "loss": 1.2937, - "step": 250 - }, - { - "epoch": 0.18997161778618732, - "grad_norm": 3.0744991302490234, - "learning_rate": 1.9870085972749733e-05, - "loss": 1.3289, - "step": 251 - }, - { - "epoch": 0.19072847682119207, - "grad_norm": 2.9217262268066406, - "learning_rate": 1.986879292179771e-05, - "loss": 1.33, - "step": 252 - }, - { - "epoch": 0.1914853358561968, - "grad_norm": 3.089919328689575, - "learning_rate": 1.986749351021686e-05, - "loss": 1.3532, - "step": 253 - }, - { - "epoch": 0.1922421948912015, - "grad_norm": 3.5609021186828613, - "learning_rate": 1.9866187738844753e-05, - "loss": 1.4002, - "step": 254 - }, - { - "epoch": 0.19299905392620625, - "grad_norm": 3.0856025218963623, - "learning_rate": 1.986487560852308e-05, - "loss": 1.3691, - "step": 255 - }, - { - "epoch": 0.19375591296121097, - "grad_norm": 2.679279327392578, - "learning_rate": 1.986355712009762e-05, - "loss": 1.3412, - "step": 256 - }, - { - "epoch": 0.1945127719962157, - "grad_norm": 3.1083905696868896, - "learning_rate": 1.9862232274418246e-05, - "loss": 1.3023, - "step": 257 - }, - { - "epoch": 0.19526963103122044, - "grad_norm": 2.726358413696289, - "learning_rate": 1.9860901072338936e-05, - "loss": 1.377, - "step": 258 - }, - { - "epoch": 0.19602649006622516, - "grad_norm": 2.966639995574951, - "learning_rate": 1.985956351471776e-05, - "loss": 1.304, - "step": 259 - }, - { - "epoch": 0.1967833491012299, - "grad_norm": 2.8776400089263916, - "learning_rate": 1.9858219602416887e-05, - "loss": 1.3481, - "step": 260 - }, - { - "epoch": 0.19754020813623463, - "grad_norm": 3.0099427700042725, - "learning_rate": 1.9856869336302588e-05, - "loss": 1.4332, - "step": 261 - }, - { - "epoch": 0.19829706717123935, - "grad_norm": 3.146959066390991, - "learning_rate": 1.985551271724522e-05, - "loss": 1.3372, - "step": 262 - }, - { - "epoch": 0.1990539262062441, - "grad_norm": 3.076327323913574, - "learning_rate": 1.9854149746119232e-05, - "loss": 1.3258, - "step": 263 - }, - { - "epoch": 0.19981078524124882, - "grad_norm": 2.8409347534179688, - "learning_rate": 1.9852780423803187e-05, - "loss": 1.2975, - "step": 264 - }, - { - "epoch": 0.20056764427625354, - "grad_norm": 3.1386849880218506, - "learning_rate": 1.9851404751179723e-05, - "loss": 1.3395, - "step": 265 - }, - { - "epoch": 0.20132450331125828, - "grad_norm": 3.104682445526123, - "learning_rate": 1.9850022729135578e-05, - "loss": 1.3667, - "step": 266 - }, - { - "epoch": 0.202081362346263, - "grad_norm": 3.337529182434082, - "learning_rate": 1.9848634358561584e-05, - "loss": 1.3145, - "step": 267 - }, - { - "epoch": 0.20283822138126775, - "grad_norm": 3.380446195602417, - "learning_rate": 1.984723964035266e-05, - "loss": 1.364, - "step": 268 - }, - { - "epoch": 0.20359508041627247, - "grad_norm": 3.161867141723633, - "learning_rate": 1.9845838575407824e-05, - "loss": 1.3333, - "step": 269 - }, - { - "epoch": 0.2043519394512772, - "grad_norm": 3.323434352874756, - "learning_rate": 1.9844431164630178e-05, - "loss": 1.3897, - "step": 270 - }, - { - "epoch": 0.20510879848628194, - "grad_norm": 3.4208099842071533, - "learning_rate": 1.984301740892692e-05, - "loss": 1.333, - "step": 271 - }, - { - "epoch": 0.20586565752128666, - "grad_norm": 3.178248643875122, - "learning_rate": 1.984159730920933e-05, - "loss": 1.3033, - "step": 272 - }, - { - "epoch": 0.20662251655629138, - "grad_norm": 3.0145297050476074, - "learning_rate": 1.9840170866392795e-05, - "loss": 1.3055, - "step": 273 - }, - { - "epoch": 0.20737937559129613, - "grad_norm": 3.6076059341430664, - "learning_rate": 1.9838738081396764e-05, - "loss": 1.3442, - "step": 274 - }, - { - "epoch": 0.20813623462630085, - "grad_norm": 3.3622937202453613, - "learning_rate": 1.9837298955144796e-05, - "loss": 1.3666, - "step": 275 - }, - { - "epoch": 0.2088930936613056, - "grad_norm": 3.782317876815796, - "learning_rate": 1.9835853488564527e-05, - "loss": 1.3791, - "step": 276 - }, - { - "epoch": 0.20964995269631032, - "grad_norm": 3.1874301433563232, - "learning_rate": 1.9834401682587688e-05, - "loss": 1.3703, - "step": 277 - }, - { - "epoch": 0.21040681173131504, - "grad_norm": 3.0065550804138184, - "learning_rate": 1.9832943538150083e-05, - "loss": 1.331, - "step": 278 - }, - { - "epoch": 0.21116367076631978, - "grad_norm": 3.953733444213867, - "learning_rate": 1.9831479056191618e-05, - "loss": 1.3855, - "step": 279 - }, - { - "epoch": 0.2119205298013245, - "grad_norm": 3.682438611984253, - "learning_rate": 1.983000823765627e-05, - "loss": 1.3605, - "step": 280 - }, - { - "epoch": 0.21267738883632922, - "grad_norm": 3.57037615776062, - "learning_rate": 1.9828531083492102e-05, - "loss": 1.3048, - "step": 281 - }, - { - "epoch": 0.21343424787133397, - "grad_norm": 3.4117233753204346, - "learning_rate": 1.9827047594651275e-05, - "loss": 1.3606, - "step": 282 - }, - { - "epoch": 0.2141911069063387, - "grad_norm": 2.914785623550415, - "learning_rate": 1.982555777209002e-05, - "loss": 1.3596, - "step": 283 - }, - { - "epoch": 0.21494796594134344, - "grad_norm": 3.271235942840576, - "learning_rate": 1.9824061616768652e-05, - "loss": 1.3208, - "step": 284 - }, - { - "epoch": 0.21570482497634816, - "grad_norm": 3.3142642974853516, - "learning_rate": 1.982255912965157e-05, - "loss": 1.3574, - "step": 285 - }, - { - "epoch": 0.21646168401135288, - "grad_norm": 3.752458095550537, - "learning_rate": 1.9821050311707253e-05, - "loss": 1.3818, - "step": 286 - }, - { - "epoch": 0.21721854304635763, - "grad_norm": 3.1010730266571045, - "learning_rate": 1.9819535163908266e-05, - "loss": 1.2799, - "step": 287 - }, - { - "epoch": 0.21797540208136235, - "grad_norm": 3.3089754581451416, - "learning_rate": 1.9818013687231252e-05, - "loss": 1.3719, - "step": 288 - }, - { - "epoch": 0.21873226111636707, - "grad_norm": 3.800584316253662, - "learning_rate": 1.9816485882656925e-05, - "loss": 1.3458, - "step": 289 - }, - { - "epoch": 0.2194891201513718, - "grad_norm": 3.5390021800994873, - "learning_rate": 1.9814951751170087e-05, - "loss": 1.3558, - "step": 290 - }, - { - "epoch": 0.22024597918637653, - "grad_norm": 3.37929630279541, - "learning_rate": 1.9813411293759618e-05, - "loss": 1.3236, - "step": 291 - }, - { - "epoch": 0.22100283822138128, - "grad_norm": 3.255699872970581, - "learning_rate": 1.9811864511418467e-05, - "loss": 1.3245, - "step": 292 - }, - { - "epoch": 0.221759697256386, - "grad_norm": 3.8194658756256104, - "learning_rate": 1.981031140514367e-05, - "loss": 1.3381, - "step": 293 - }, - { - "epoch": 0.22251655629139072, - "grad_norm": 3.8124804496765137, - "learning_rate": 1.9808751975936344e-05, - "loss": 1.3006, - "step": 294 - }, - { - "epoch": 0.22327341532639547, - "grad_norm": 3.637120246887207, - "learning_rate": 1.980718622480166e-05, - "loss": 1.3411, - "step": 295 - }, - { - "epoch": 0.2240302743614002, - "grad_norm": 3.8235883712768555, - "learning_rate": 1.9805614152748887e-05, - "loss": 1.3285, - "step": 296 - }, - { - "epoch": 0.2247871333964049, - "grad_norm": 3.892608642578125, - "learning_rate": 1.980403576079135e-05, - "loss": 1.3015, - "step": 297 - }, - { - "epoch": 0.22554399243140966, - "grad_norm": 3.9942359924316406, - "learning_rate": 1.9802451049946468e-05, - "loss": 1.3404, - "step": 298 - }, - { - "epoch": 0.22630085146641438, - "grad_norm": 3.8982861042022705, - "learning_rate": 1.9800860021235708e-05, - "loss": 1.3194, - "step": 299 - }, - { - "epoch": 0.22705771050141912, - "grad_norm": 4.402480125427246, - "learning_rate": 1.979926267568463e-05, - "loss": 1.3383, - "step": 300 - }, - { - "epoch": 0.22781456953642384, - "grad_norm": 3.9718708992004395, - "learning_rate": 1.979765901432286e-05, - "loss": 1.301, - "step": 301 - }, - { - "epoch": 0.22857142857142856, - "grad_norm": 3.8678481578826904, - "learning_rate": 1.979604903818409e-05, - "loss": 1.3269, - "step": 302 - }, - { - "epoch": 0.2293282876064333, - "grad_norm": 3.352957010269165, - "learning_rate": 1.979443274830609e-05, - "loss": 1.3332, - "step": 303 - }, - { - "epoch": 0.23008514664143803, - "grad_norm": 3.937535524368286, - "learning_rate": 1.9792810145730696e-05, - "loss": 1.3464, - "step": 304 - }, - { - "epoch": 0.23084200567644275, - "grad_norm": 4.081162929534912, - "learning_rate": 1.9791181231503804e-05, - "loss": 1.327, - "step": 305 - }, - { - "epoch": 0.2315988647114475, - "grad_norm": 3.5600180625915527, - "learning_rate": 1.97895460066754e-05, - "loss": 1.3536, - "step": 306 - }, - { - "epoch": 0.23235572374645222, - "grad_norm": 3.9321706295013428, - "learning_rate": 1.9787904472299512e-05, - "loss": 1.2999, - "step": 307 - }, - { - "epoch": 0.23311258278145697, - "grad_norm": 4.384609699249268, - "learning_rate": 1.978625662943426e-05, - "loss": 1.3461, - "step": 308 - }, - { - "epoch": 0.2338694418164617, - "grad_norm": 4.421790599822998, - "learning_rate": 1.978460247914181e-05, - "loss": 1.3324, - "step": 309 - }, - { - "epoch": 0.2346263008514664, - "grad_norm": 4.101651191711426, - "learning_rate": 1.9782942022488404e-05, - "loss": 1.2738, - "step": 310 - }, - { - "epoch": 0.23538315988647115, - "grad_norm": 4.219285488128662, - "learning_rate": 1.978127526054435e-05, - "loss": 1.3519, - "step": 311 - }, - { - "epoch": 0.23614001892147587, - "grad_norm": 3.5981838703155518, - "learning_rate": 1.9779602194384014e-05, - "loss": 1.3546, - "step": 312 - }, - { - "epoch": 0.2368968779564806, - "grad_norm": 3.758359432220459, - "learning_rate": 1.9777922825085835e-05, - "loss": 1.3264, - "step": 313 - }, - { - "epoch": 0.23765373699148534, - "grad_norm": 3.7645103931427, - "learning_rate": 1.97762371537323e-05, - "loss": 1.3135, - "step": 314 - }, - { - "epoch": 0.23841059602649006, - "grad_norm": 3.3905699253082275, - "learning_rate": 1.9774545181409973e-05, - "loss": 1.2848, - "step": 315 - }, - { - "epoch": 0.2391674550614948, - "grad_norm": 3.6732635498046875, - "learning_rate": 1.9772846909209473e-05, - "loss": 1.3487, - "step": 316 - }, - { - "epoch": 0.23992431409649953, - "grad_norm": 3.8122737407684326, - "learning_rate": 1.9771142338225476e-05, - "loss": 1.333, - "step": 317 - }, - { - "epoch": 0.24068117313150425, - "grad_norm": 4.025964260101318, - "learning_rate": 1.9769431469556728e-05, - "loss": 1.3431, - "step": 318 - }, - { - "epoch": 0.241438032166509, - "grad_norm": 3.054323196411133, - "learning_rate": 1.9767714304306024e-05, - "loss": 1.3279, - "step": 319 - }, - { - "epoch": 0.24219489120151372, - "grad_norm": 4.698709964752197, - "learning_rate": 1.9765990843580227e-05, - "loss": 1.3209, - "step": 320 - }, - { - "epoch": 0.24295175023651844, - "grad_norm": 3.341327428817749, - "learning_rate": 1.976426108849025e-05, - "loss": 1.3424, - "step": 321 - }, - { - "epoch": 0.24370860927152319, - "grad_norm": 3.9361190795898438, - "learning_rate": 1.9762525040151074e-05, - "loss": 1.3083, - "step": 322 - }, - { - "epoch": 0.2444654683065279, - "grad_norm": 3.340085506439209, - "learning_rate": 1.9760782699681716e-05, - "loss": 1.3358, - "step": 323 - }, - { - "epoch": 0.24522232734153265, - "grad_norm": 3.044618606567383, - "learning_rate": 1.9759034068205273e-05, - "loss": 1.3099, - "step": 324 - }, - { - "epoch": 0.24597918637653737, - "grad_norm": 3.619760274887085, - "learning_rate": 1.9757279146848883e-05, - "loss": 1.3455, - "step": 325 - }, - { - "epoch": 0.2467360454115421, - "grad_norm": 3.7121100425720215, - "learning_rate": 1.975551793674374e-05, - "loss": 1.3106, - "step": 326 - }, - { - "epoch": 0.24749290444654684, - "grad_norm": 3.5931692123413086, - "learning_rate": 1.9753750439025095e-05, - "loss": 1.2905, - "step": 327 - }, - { - "epoch": 0.24824976348155156, - "grad_norm": 3.603030204772949, - "learning_rate": 1.975197665483225e-05, - "loss": 1.3319, - "step": 328 - }, - { - "epoch": 0.24900662251655628, - "grad_norm": 3.6277918815612793, - "learning_rate": 1.9750196585308564e-05, - "loss": 1.3393, - "step": 329 - }, - { - "epoch": 0.24976348155156103, - "grad_norm": 3.5887362957000732, - "learning_rate": 1.974841023160143e-05, - "loss": 1.3866, - "step": 330 - }, - { - "epoch": 0.25052034058656575, - "grad_norm": 3.4283299446105957, - "learning_rate": 1.974661759486232e-05, - "loss": 1.329, - "step": 331 - }, - { - "epoch": 0.25127719962157047, - "grad_norm": 3.7355992794036865, - "learning_rate": 1.9744818676246724e-05, - "loss": 1.3129, - "step": 332 - }, - { - "epoch": 0.2520340586565752, - "grad_norm": 3.726663589477539, - "learning_rate": 1.974301347691421e-05, - "loss": 1.3665, - "step": 333 - }, - { - "epoch": 0.25279091769157996, - "grad_norm": 3.93129825592041, - "learning_rate": 1.9741201998028377e-05, - "loss": 1.3876, - "step": 334 - }, - { - "epoch": 0.2535477767265847, - "grad_norm": 3.588931083679199, - "learning_rate": 1.9739384240756873e-05, - "loss": 1.3715, - "step": 335 - }, - { - "epoch": 0.2543046357615894, - "grad_norm": 3.4406232833862305, - "learning_rate": 1.9737560206271404e-05, - "loss": 1.3013, - "step": 336 - }, - { - "epoch": 0.2550614947965941, - "grad_norm": 3.481201171875, - "learning_rate": 1.9735729895747714e-05, - "loss": 1.3625, - "step": 337 - }, - { - "epoch": 0.25581835383159884, - "grad_norm": 3.7452211380004883, - "learning_rate": 1.973389331036559e-05, - "loss": 1.3452, - "step": 338 - }, - { - "epoch": 0.2565752128666036, - "grad_norm": 3.8469581604003906, - "learning_rate": 1.973205045130887e-05, - "loss": 1.3824, - "step": 339 - }, - { - "epoch": 0.25733207190160834, - "grad_norm": 3.252890110015869, - "learning_rate": 1.9730201319765423e-05, - "loss": 1.311, - "step": 340 - }, - { - "epoch": 0.25808893093661306, - "grad_norm": 3.9583048820495605, - "learning_rate": 1.9728345916927187e-05, - "loss": 1.3244, - "step": 341 - }, - { - "epoch": 0.2588457899716178, - "grad_norm": 3.6613519191741943, - "learning_rate": 1.9726484243990115e-05, - "loss": 1.3539, - "step": 342 - }, - { - "epoch": 0.2596026490066225, - "grad_norm": 3.4180917739868164, - "learning_rate": 1.9724616302154218e-05, - "loss": 1.3353, - "step": 343 - }, - { - "epoch": 0.2603595080416273, - "grad_norm": 3.7470951080322266, - "learning_rate": 1.9722742092623536e-05, - "loss": 1.2864, - "step": 344 - }, - { - "epoch": 0.261116367076632, - "grad_norm": 4.141618251800537, - "learning_rate": 1.9720861616606165e-05, - "loss": 1.3486, - "step": 345 - }, - { - "epoch": 0.2618732261116367, - "grad_norm": 3.7161524295806885, - "learning_rate": 1.9718974875314226e-05, - "loss": 1.339, - "step": 346 - }, - { - "epoch": 0.26263008514664143, - "grad_norm": 4.011509895324707, - "learning_rate": 1.9717081869963887e-05, - "loss": 1.4027, - "step": 347 - }, - { - "epoch": 0.26338694418164615, - "grad_norm": 4.976902008056641, - "learning_rate": 1.9715182601775348e-05, - "loss": 1.3078, - "step": 348 - }, - { - "epoch": 0.2641438032166509, - "grad_norm": 3.8435733318328857, - "learning_rate": 1.9713277071972844e-05, - "loss": 1.3013, - "step": 349 - }, - { - "epoch": 0.26490066225165565, - "grad_norm": 3.3969762325286865, - "learning_rate": 1.971136528178466e-05, - "loss": 1.3078, - "step": 350 - }, - { - "epoch": 0.26565752128666037, - "grad_norm": 4.123608112335205, - "learning_rate": 1.9709447232443096e-05, - "loss": 1.3476, - "step": 351 - }, - { - "epoch": 0.2664143803216651, - "grad_norm": 3.974820137023926, - "learning_rate": 1.9707522925184507e-05, - "loss": 1.377, - "step": 352 - }, - { - "epoch": 0.2671712393566698, - "grad_norm": 4.08565616607666, - "learning_rate": 1.9705592361249267e-05, - "loss": 1.3559, - "step": 353 - }, - { - "epoch": 0.26792809839167453, - "grad_norm": 3.7338943481445312, - "learning_rate": 1.970365554188179e-05, - "loss": 1.2845, - "step": 354 - }, - { - "epoch": 0.2686849574266793, - "grad_norm": 3.806567430496216, - "learning_rate": 1.9701712468330518e-05, - "loss": 1.4283, - "step": 355 - }, - { - "epoch": 0.269441816461684, - "grad_norm": 3.4662294387817383, - "learning_rate": 1.9699763141847928e-05, - "loss": 1.3068, - "step": 356 - }, - { - "epoch": 0.27019867549668874, - "grad_norm": 3.5118749141693115, - "learning_rate": 1.9697807563690522e-05, - "loss": 1.266, - "step": 357 - }, - { - "epoch": 0.27095553453169346, - "grad_norm": 4.166219711303711, - "learning_rate": 1.969584573511885e-05, - "loss": 1.3355, - "step": 358 - }, - { - "epoch": 0.2717123935666982, - "grad_norm": 3.828523635864258, - "learning_rate": 1.969387765739746e-05, - "loss": 1.2712, - "step": 359 - }, - { - "epoch": 0.27246925260170296, - "grad_norm": 3.8785219192504883, - "learning_rate": 1.969190333179495e-05, - "loss": 1.2761, - "step": 360 - }, - { - "epoch": 0.2732261116367077, - "grad_norm": 3.772268056869507, - "learning_rate": 1.9689922759583947e-05, - "loss": 1.372, - "step": 361 - }, - { - "epoch": 0.2739829706717124, - "grad_norm": 3.7379493713378906, - "learning_rate": 1.968793594204109e-05, - "loss": 1.2843, - "step": 362 - }, - { - "epoch": 0.2747398297067171, - "grad_norm": 4.294455051422119, - "learning_rate": 1.9685942880447054e-05, - "loss": 1.3069, - "step": 363 - }, - { - "epoch": 0.27549668874172184, - "grad_norm": 4.1428728103637695, - "learning_rate": 1.9683943576086536e-05, - "loss": 1.366, - "step": 364 - }, - { - "epoch": 0.27625354777672656, - "grad_norm": 3.9030814170837402, - "learning_rate": 1.9681938030248257e-05, - "loss": 1.342, - "step": 365 - }, - { - "epoch": 0.27701040681173134, - "grad_norm": 4.4898681640625, - "learning_rate": 1.967992624422496e-05, - "loss": 1.2735, - "step": 366 - }, - { - "epoch": 0.27776726584673606, - "grad_norm": 4.548799514770508, - "learning_rate": 1.9677908219313414e-05, - "loss": 1.3589, - "step": 367 - }, - { - "epoch": 0.2785241248817408, - "grad_norm": 4.4808478355407715, - "learning_rate": 1.9675883956814403e-05, - "loss": 1.373, - "step": 368 - }, - { - "epoch": 0.2792809839167455, - "grad_norm": 4.146103858947754, - "learning_rate": 1.967385345803274e-05, - "loss": 1.2748, - "step": 369 - }, - { - "epoch": 0.2800378429517502, - "grad_norm": 5.006552696228027, - "learning_rate": 1.9671816724277254e-05, - "loss": 1.2852, - "step": 370 - }, - { - "epoch": 0.280794701986755, - "grad_norm": 4.279321670532227, - "learning_rate": 1.966977375686079e-05, - "loss": 1.3634, - "step": 371 - }, - { - "epoch": 0.2815515610217597, - "grad_norm": 5.318479537963867, - "learning_rate": 1.9667724557100214e-05, - "loss": 1.3184, - "step": 372 - }, - { - "epoch": 0.28230842005676443, - "grad_norm": 4.354931354522705, - "learning_rate": 1.966566912631641e-05, - "loss": 1.3018, - "step": 373 - }, - { - "epoch": 0.28306527909176915, - "grad_norm": 3.5126800537109375, - "learning_rate": 1.9663607465834275e-05, - "loss": 1.2811, - "step": 374 - }, - { - "epoch": 0.28382213812677387, - "grad_norm": 4.875300407409668, - "learning_rate": 1.9661539576982728e-05, - "loss": 1.3238, - "step": 375 - }, - { - "epoch": 0.28457899716177865, - "grad_norm": 4.699173450469971, - "learning_rate": 1.9659465461094692e-05, - "loss": 1.3223, - "step": 376 - }, - { - "epoch": 0.28533585619678337, - "grad_norm": 3.6528842449188232, - "learning_rate": 1.9657385119507118e-05, - "loss": 1.292, - "step": 377 - }, - { - "epoch": 0.2860927152317881, - "grad_norm": 3.849123239517212, - "learning_rate": 1.965529855356096e-05, - "loss": 1.3114, - "step": 378 - }, - { - "epoch": 0.2868495742667928, - "grad_norm": 3.7049927711486816, - "learning_rate": 1.9653205764601182e-05, - "loss": 1.3314, - "step": 379 - }, - { - "epoch": 0.2876064333017975, - "grad_norm": 4.335115909576416, - "learning_rate": 1.9651106753976768e-05, - "loss": 1.3719, - "step": 380 - }, - { - "epoch": 0.28836329233680225, - "grad_norm": 4.870954990386963, - "learning_rate": 1.964900152304071e-05, - "loss": 1.3264, - "step": 381 - }, - { - "epoch": 0.289120151371807, - "grad_norm": 4.583834648132324, - "learning_rate": 1.9646890073150005e-05, - "loss": 1.3743, - "step": 382 - }, - { - "epoch": 0.28987701040681174, - "grad_norm": 3.795956611633301, - "learning_rate": 1.964477240566566e-05, - "loss": 1.2997, - "step": 383 - }, - { - "epoch": 0.29063386944181646, - "grad_norm": 5.41873025894165, - "learning_rate": 1.9642648521952695e-05, - "loss": 1.3381, - "step": 384 - }, - { - "epoch": 0.2913907284768212, - "grad_norm": 4.2772393226623535, - "learning_rate": 1.9640518423380127e-05, - "loss": 1.3322, - "step": 385 - }, - { - "epoch": 0.2921475875118259, - "grad_norm": 10.241232872009277, - "learning_rate": 1.9638382111320996e-05, - "loss": 1.3249, - "step": 386 - }, - { - "epoch": 0.2929044465468307, - "grad_norm": 3.4204752445220947, - "learning_rate": 1.9636239587152323e-05, - "loss": 1.3295, - "step": 387 - }, - { - "epoch": 0.2936613055818354, - "grad_norm": 3.368516683578491, - "learning_rate": 1.9634090852255154e-05, - "loss": 1.3561, - "step": 388 - }, - { - "epoch": 0.2944181646168401, - "grad_norm": 3.5226809978485107, - "learning_rate": 1.9631935908014532e-05, - "loss": 1.3146, - "step": 389 - }, - { - "epoch": 0.29517502365184484, - "grad_norm": 3.446794271469116, - "learning_rate": 1.9629774755819495e-05, - "loss": 1.2973, - "step": 390 - }, - { - "epoch": 0.29593188268684956, - "grad_norm": 3.176982879638672, - "learning_rate": 1.9627607397063097e-05, - "loss": 1.3233, - "step": 391 - }, - { - "epoch": 0.29668874172185433, - "grad_norm": 3.0656180381774902, - "learning_rate": 1.9625433833142376e-05, - "loss": 1.3246, - "step": 392 - }, - { - "epoch": 0.29744560075685905, - "grad_norm": 3.4723055362701416, - "learning_rate": 1.9623254065458387e-05, - "loss": 1.3461, - "step": 393 - }, - { - "epoch": 0.29820245979186377, - "grad_norm": 3.2769827842712402, - "learning_rate": 1.962106809541616e-05, - "loss": 1.3242, - "step": 394 - }, - { - "epoch": 0.2989593188268685, - "grad_norm": 2.8769099712371826, - "learning_rate": 1.9618875924424756e-05, - "loss": 1.2548, - "step": 395 - }, - { - "epoch": 0.2997161778618732, - "grad_norm": 3.3671765327453613, - "learning_rate": 1.9616677553897204e-05, - "loss": 1.3241, - "step": 396 - }, - { - "epoch": 0.30047303689687793, - "grad_norm": 3.104637384414673, - "learning_rate": 1.9614472985250547e-05, - "loss": 1.3121, - "step": 397 - }, - { - "epoch": 0.3012298959318827, - "grad_norm": 3.6635613441467285, - "learning_rate": 1.9612262219905807e-05, - "loss": 1.3157, - "step": 398 - }, - { - "epoch": 0.3019867549668874, - "grad_norm": 3.4978229999542236, - "learning_rate": 1.9610045259288017e-05, - "loss": 1.3566, - "step": 399 - }, - { - "epoch": 0.30274361400189215, - "grad_norm": 3.084291458129883, - "learning_rate": 1.9607822104826198e-05, - "loss": 1.3157, - "step": 400 - }, - { - "epoch": 0.30350047303689687, - "grad_norm": 2.742034673690796, - "learning_rate": 1.9605592757953354e-05, - "loss": 1.2778, - "step": 401 - }, - { - "epoch": 0.3042573320719016, - "grad_norm": 2.89613938331604, - "learning_rate": 1.960335722010649e-05, - "loss": 1.3467, - "step": 402 - }, - { - "epoch": 0.30501419110690636, - "grad_norm": 3.3919119834899902, - "learning_rate": 1.9601115492726603e-05, - "loss": 1.3264, - "step": 403 - }, - { - "epoch": 0.3057710501419111, - "grad_norm": 2.5944290161132812, - "learning_rate": 1.9598867577258672e-05, - "loss": 1.3765, - "step": 404 - }, - { - "epoch": 0.3065279091769158, - "grad_norm": 2.6674866676330566, - "learning_rate": 1.9596613475151674e-05, - "loss": 1.3077, - "step": 405 - }, - { - "epoch": 0.3072847682119205, - "grad_norm": 2.9125916957855225, - "learning_rate": 1.9594353187858567e-05, - "loss": 1.3472, - "step": 406 - }, - { - "epoch": 0.30804162724692524, - "grad_norm": 2.746316909790039, - "learning_rate": 1.9592086716836292e-05, - "loss": 1.3137, - "step": 407 - }, - { - "epoch": 0.30879848628193, - "grad_norm": 3.5916221141815186, - "learning_rate": 1.958981406354579e-05, - "loss": 1.3181, - "step": 408 - }, - { - "epoch": 0.30955534531693474, - "grad_norm": 2.9677999019622803, - "learning_rate": 1.9587535229451973e-05, - "loss": 1.3094, - "step": 409 - }, - { - "epoch": 0.31031220435193946, - "grad_norm": 2.766179084777832, - "learning_rate": 1.9585250216023746e-05, - "loss": 1.3205, - "step": 410 - }, - { - "epoch": 0.3110690633869442, - "grad_norm": 3.019426107406616, - "learning_rate": 1.9582959024733992e-05, - "loss": 1.3053, - "step": 411 - }, - { - "epoch": 0.3118259224219489, - "grad_norm": 3.580401659011841, - "learning_rate": 1.9580661657059582e-05, - "loss": 1.3685, - "step": 412 - }, - { - "epoch": 0.3125827814569536, - "grad_norm": 3.2559759616851807, - "learning_rate": 1.957835811448136e-05, - "loss": 1.2975, - "step": 413 - }, - { - "epoch": 0.3133396404919584, - "grad_norm": 3.185425281524658, - "learning_rate": 1.957604839848415e-05, - "loss": 1.3391, - "step": 414 - }, - { - "epoch": 0.3140964995269631, - "grad_norm": 3.2222900390625, - "learning_rate": 1.9573732510556772e-05, - "loss": 1.2233, - "step": 415 - }, - { - "epoch": 0.31485335856196783, - "grad_norm": 3.3176467418670654, - "learning_rate": 1.9571410452192003e-05, - "loss": 1.32, - "step": 416 - }, - { - "epoch": 0.31561021759697255, - "grad_norm": 2.996213912963867, - "learning_rate": 1.9569082224886607e-05, - "loss": 1.3158, - "step": 417 - }, - { - "epoch": 0.3163670766319773, - "grad_norm": 2.757145881652832, - "learning_rate": 1.9566747830141327e-05, - "loss": 1.2747, - "step": 418 - }, - { - "epoch": 0.31712393566698205, - "grad_norm": 3.0630686283111572, - "learning_rate": 1.9564407269460873e-05, - "loss": 1.2863, - "step": 419 - }, - { - "epoch": 0.31788079470198677, - "grad_norm": 2.979710102081299, - "learning_rate": 1.956206054435394e-05, - "loss": 1.3017, - "step": 420 - }, - { - "epoch": 0.3186376537369915, - "grad_norm": 2.9305684566497803, - "learning_rate": 1.955970765633319e-05, - "loss": 1.2655, - "step": 421 - }, - { - "epoch": 0.3193945127719962, - "grad_norm": 3.2490427494049072, - "learning_rate": 1.955734860691526e-05, - "loss": 1.312, - "step": 422 - }, - { - "epoch": 0.32015137180700093, - "grad_norm": 2.647688388824463, - "learning_rate": 1.9554983397620754e-05, - "loss": 1.3009, - "step": 423 - }, - { - "epoch": 0.3209082308420057, - "grad_norm": 2.8015365600585938, - "learning_rate": 1.9552612029974246e-05, - "loss": 1.3069, - "step": 424 - }, - { - "epoch": 0.3216650898770104, - "grad_norm": 2.606043577194214, - "learning_rate": 1.9550234505504294e-05, - "loss": 1.2951, - "step": 425 - }, - { - "epoch": 0.32242194891201514, - "grad_norm": 2.9746274948120117, - "learning_rate": 1.9547850825743407e-05, - "loss": 1.2736, - "step": 426 - }, - { - "epoch": 0.32317880794701986, - "grad_norm": 3.0589208602905273, - "learning_rate": 1.9545460992228074e-05, - "loss": 1.3242, - "step": 427 - }, - { - "epoch": 0.3239356669820246, - "grad_norm": 3.041224956512451, - "learning_rate": 1.954306500649874e-05, - "loss": 1.3397, - "step": 428 - }, - { - "epoch": 0.3246925260170293, - "grad_norm": 2.700326681137085, - "learning_rate": 1.954066287009982e-05, - "loss": 1.2822, - "step": 429 - }, - { - "epoch": 0.3254493850520341, - "grad_norm": 2.7489256858825684, - "learning_rate": 1.95382545845797e-05, - "loss": 1.3056, - "step": 430 - }, - { - "epoch": 0.3262062440870388, - "grad_norm": 3.0966339111328125, - "learning_rate": 1.953584015149072e-05, - "loss": 1.3316, - "step": 431 - }, - { - "epoch": 0.3269631031220435, - "grad_norm": 2.661102533340454, - "learning_rate": 1.9533419572389186e-05, - "loss": 1.3017, - "step": 432 - }, - { - "epoch": 0.32771996215704824, - "grad_norm": 3.1965274810791016, - "learning_rate": 1.9530992848835367e-05, - "loss": 1.2975, - "step": 433 - }, - { - "epoch": 0.32847682119205296, - "grad_norm": 3.0282115936279297, - "learning_rate": 1.9528559982393497e-05, - "loss": 1.3261, - "step": 434 - }, - { - "epoch": 0.32923368022705773, - "grad_norm": 2.794201374053955, - "learning_rate": 1.9526120974631763e-05, - "loss": 1.3363, - "step": 435 - }, - { - "epoch": 0.32999053926206245, - "grad_norm": 2.8009607791900635, - "learning_rate": 1.9523675827122305e-05, - "loss": 1.2738, - "step": 436 - }, - { - "epoch": 0.3307473982970672, - "grad_norm": 3.1605050563812256, - "learning_rate": 1.952122454144123e-05, - "loss": 1.3242, - "step": 437 - }, - { - "epoch": 0.3315042573320719, - "grad_norm": 2.7758185863494873, - "learning_rate": 1.9518767119168608e-05, - "loss": 1.2546, - "step": 438 - }, - { - "epoch": 0.3322611163670766, - "grad_norm": 3.3435556888580322, - "learning_rate": 1.9516303561888446e-05, - "loss": 1.2966, - "step": 439 - }, - { - "epoch": 0.3330179754020814, - "grad_norm": 3.4312620162963867, - "learning_rate": 1.9513833871188724e-05, - "loss": 1.328, - "step": 440 - }, - { - "epoch": 0.3337748344370861, - "grad_norm": 3.4291491508483887, - "learning_rate": 1.951135804866136e-05, - "loss": 1.2927, - "step": 441 - }, - { - "epoch": 0.33453169347209083, - "grad_norm": 2.797574281692505, - "learning_rate": 1.9508876095902236e-05, - "loss": 1.3218, - "step": 442 - }, - { - "epoch": 0.33528855250709555, - "grad_norm": 3.1859307289123535, - "learning_rate": 1.9506388014511176e-05, - "loss": 1.2827, - "step": 443 - }, - { - "epoch": 0.33604541154210027, - "grad_norm": 3.4026360511779785, - "learning_rate": 1.950389380609196e-05, - "loss": 1.2879, - "step": 444 - }, - { - "epoch": 0.336802270577105, - "grad_norm": 3.2964580059051514, - "learning_rate": 1.9501393472252324e-05, - "loss": 1.2976, - "step": 445 - }, - { - "epoch": 0.33755912961210977, - "grad_norm": 3.227969169616699, - "learning_rate": 1.9498887014603937e-05, - "loss": 1.3191, - "step": 446 - }, - { - "epoch": 0.3383159886471145, - "grad_norm": 4.118795871734619, - "learning_rate": 1.949637443476243e-05, - "loss": 1.3112, - "step": 447 - }, - { - "epoch": 0.3390728476821192, - "grad_norm": 3.7260451316833496, - "learning_rate": 1.9493855734347367e-05, - "loss": 1.2836, - "step": 448 - }, - { - "epoch": 0.3398297067171239, - "grad_norm": 3.0048820972442627, - "learning_rate": 1.9491330914982265e-05, - "loss": 1.3106, - "step": 449 - }, - { - "epoch": 0.34058656575212864, - "grad_norm": 2.7483198642730713, - "learning_rate": 1.9488799978294586e-05, - "loss": 1.338, - "step": 450 - }, - { - "epoch": 0.3413434247871334, - "grad_norm": 3.021895170211792, - "learning_rate": 1.9486262925915736e-05, - "loss": 1.2931, - "step": 451 - }, - { - "epoch": 0.34210028382213814, - "grad_norm": 2.793663740158081, - "learning_rate": 1.948371975948106e-05, - "loss": 1.2895, - "step": 452 - }, - { - "epoch": 0.34285714285714286, - "grad_norm": 3.6046817302703857, - "learning_rate": 1.9481170480629835e-05, - "loss": 1.326, - "step": 453 - }, - { - "epoch": 0.3436140018921476, - "grad_norm": 2.8959131240844727, - "learning_rate": 1.9478615091005296e-05, - "loss": 1.3018, - "step": 454 - }, - { - "epoch": 0.3443708609271523, - "grad_norm": 2.869874954223633, - "learning_rate": 1.9476053592254608e-05, - "loss": 1.3181, - "step": 455 - }, - { - "epoch": 0.3451277199621571, - "grad_norm": 2.9448678493499756, - "learning_rate": 1.947348598602887e-05, - "loss": 1.2992, - "step": 456 - }, - { - "epoch": 0.3458845789971618, - "grad_norm": 2.8842031955718994, - "learning_rate": 1.9470912273983123e-05, - "loss": 1.3297, - "step": 457 - }, - { - "epoch": 0.3466414380321665, - "grad_norm": 3.329968214035034, - "learning_rate": 1.946833245777635e-05, - "loss": 1.3074, - "step": 458 - }, - { - "epoch": 0.34739829706717124, - "grad_norm": 2.8565642833709717, - "learning_rate": 1.9465746539071447e-05, - "loss": 1.3204, - "step": 459 - }, - { - "epoch": 0.34815515610217596, - "grad_norm": 3.0529487133026123, - "learning_rate": 1.946315451953527e-05, - "loss": 1.3249, - "step": 460 - }, - { - "epoch": 0.3489120151371807, - "grad_norm": 2.988011360168457, - "learning_rate": 1.946055640083859e-05, - "loss": 1.2612, - "step": 461 - }, - { - "epoch": 0.34966887417218545, - "grad_norm": 3.3266758918762207, - "learning_rate": 1.945795218465611e-05, - "loss": 1.3283, - "step": 462 - }, - { - "epoch": 0.35042573320719017, - "grad_norm": 3.2849862575531006, - "learning_rate": 1.945534187266648e-05, - "loss": 1.3476, - "step": 463 - }, - { - "epoch": 0.3511825922421949, - "grad_norm": 2.831113576889038, - "learning_rate": 1.945272546655226e-05, - "loss": 1.2726, - "step": 464 - }, - { - "epoch": 0.3519394512771996, - "grad_norm": 3.232224464416504, - "learning_rate": 1.9450102967999946e-05, - "loss": 1.3362, - "step": 465 - }, - { - "epoch": 0.35269631031220433, - "grad_norm": 3.704671621322632, - "learning_rate": 1.944747437869996e-05, - "loss": 1.3011, - "step": 466 - }, - { - "epoch": 0.3534531693472091, - "grad_norm": 2.6540513038635254, - "learning_rate": 1.944483970034665e-05, - "loss": 1.3268, - "step": 467 - }, - { - "epoch": 0.3542100283822138, - "grad_norm": 3.856849431991577, - "learning_rate": 1.944219893463829e-05, - "loss": 1.2762, - "step": 468 - }, - { - "epoch": 0.35496688741721855, - "grad_norm": 2.809225082397461, - "learning_rate": 1.943955208327708e-05, - "loss": 1.2515, - "step": 469 - }, - { - "epoch": 0.35572374645222327, - "grad_norm": 3.271754503250122, - "learning_rate": 1.943689914796914e-05, - "loss": 1.313, - "step": 470 - }, - { - "epoch": 0.356480605487228, - "grad_norm": 2.872096061706543, - "learning_rate": 1.9434240130424504e-05, - "loss": 1.2762, - "step": 471 - }, - { - "epoch": 0.35723746452223276, - "grad_norm": 2.9466817378997803, - "learning_rate": 1.9431575032357147e-05, - "loss": 1.3123, - "step": 472 - }, - { - "epoch": 0.3579943235572375, - "grad_norm": 3.358745813369751, - "learning_rate": 1.9428903855484938e-05, - "loss": 1.2684, - "step": 473 - }, - { - "epoch": 0.3587511825922422, - "grad_norm": 3.3290534019470215, - "learning_rate": 1.9426226601529685e-05, - "loss": 1.321, - "step": 474 - }, - { - "epoch": 0.3595080416272469, - "grad_norm": 3.1677582263946533, - "learning_rate": 1.9423543272217103e-05, - "loss": 1.2994, - "step": 475 - }, - { - "epoch": 0.36026490066225164, - "grad_norm": 3.891291618347168, - "learning_rate": 1.9420853869276822e-05, - "loss": 1.2783, - "step": 476 - }, - { - "epoch": 0.36102175969725636, - "grad_norm": 3.3545546531677246, - "learning_rate": 1.9418158394442395e-05, - "loss": 1.2985, - "step": 477 - }, - { - "epoch": 0.36177861873226114, - "grad_norm": 3.187551498413086, - "learning_rate": 1.941545684945128e-05, - "loss": 1.3401, - "step": 478 - }, - { - "epoch": 0.36253547776726586, - "grad_norm": 3.063565969467163, - "learning_rate": 1.9412749236044855e-05, - "loss": 1.2574, - "step": 479 - }, - { - "epoch": 0.3632923368022706, - "grad_norm": 3.0356266498565674, - "learning_rate": 1.9410035555968403e-05, - "loss": 1.2734, - "step": 480 - }, - { - "epoch": 0.3640491958372753, - "grad_norm": 4.256435871124268, - "learning_rate": 1.9407315810971123e-05, - "loss": 1.2623, - "step": 481 - }, - { - "epoch": 0.36480605487228, - "grad_norm": 3.298546075820923, - "learning_rate": 1.9404590002806122e-05, - "loss": 1.3079, - "step": 482 - }, - { - "epoch": 0.3655629139072848, - "grad_norm": 3.06703782081604, - "learning_rate": 1.9401858133230412e-05, - "loss": 1.347, - "step": 483 - }, - { - "epoch": 0.3663197729422895, - "grad_norm": 3.244100332260132, - "learning_rate": 1.9399120204004917e-05, - "loss": 1.298, - "step": 484 - }, - { - "epoch": 0.36707663197729423, - "grad_norm": 2.7238996028900146, - "learning_rate": 1.9396376216894462e-05, - "loss": 1.2434, - "step": 485 - }, - { - "epoch": 0.36783349101229895, - "grad_norm": 3.0345072746276855, - "learning_rate": 1.939362617366778e-05, - "loss": 1.3634, - "step": 486 - }, - { - "epoch": 0.36859035004730367, - "grad_norm": 3.2847676277160645, - "learning_rate": 1.9390870076097507e-05, - "loss": 1.3037, - "step": 487 - }, - { - "epoch": 0.36934720908230845, - "grad_norm": 2.8988196849823, - "learning_rate": 1.9388107925960183e-05, - "loss": 1.3137, - "step": 488 - }, - { - "epoch": 0.37010406811731317, - "grad_norm": 3.4510462284088135, - "learning_rate": 1.9385339725036244e-05, - "loss": 1.3042, - "step": 489 - }, - { - "epoch": 0.3708609271523179, - "grad_norm": 2.7349674701690674, - "learning_rate": 1.938256547511003e-05, - "loss": 1.3104, - "step": 490 - }, - { - "epoch": 0.3716177861873226, - "grad_norm": 2.6827545166015625, - "learning_rate": 1.9379785177969787e-05, - "loss": 1.3312, - "step": 491 - }, - { - "epoch": 0.3723746452223273, - "grad_norm": 2.871415138244629, - "learning_rate": 1.937699883540765e-05, - "loss": 1.2999, - "step": 492 - }, - { - "epoch": 0.37313150425733205, - "grad_norm": 3.261521339416504, - "learning_rate": 1.9374206449219646e-05, - "loss": 1.3027, - "step": 493 - }, - { - "epoch": 0.3738883632923368, - "grad_norm": 2.7188801765441895, - "learning_rate": 1.9371408021205708e-05, - "loss": 1.2688, - "step": 494 - }, - { - "epoch": 0.37464522232734154, - "grad_norm": 2.762587070465088, - "learning_rate": 1.936860355316967e-05, - "loss": 1.316, - "step": 495 - }, - { - "epoch": 0.37540208136234626, - "grad_norm": 3.2157773971557617, - "learning_rate": 1.9365793046919233e-05, - "loss": 1.2818, - "step": 496 - }, - { - "epoch": 0.376158940397351, - "grad_norm": 2.720599889755249, - "learning_rate": 1.9362976504266017e-05, - "loss": 1.2767, - "step": 497 - }, - { - "epoch": 0.3769157994323557, - "grad_norm": 2.728111982345581, - "learning_rate": 1.936015392702552e-05, - "loss": 1.3292, - "step": 498 - }, - { - "epoch": 0.3776726584673605, - "grad_norm": 3.2255611419677734, - "learning_rate": 1.9357325317017127e-05, - "loss": 1.3165, - "step": 499 - }, - { - "epoch": 0.3784295175023652, - "grad_norm": 3.559377431869507, - "learning_rate": 1.935449067606413e-05, - "loss": 1.3284, - "step": 500 - }, - { - "epoch": 0.3791863765373699, - "grad_norm": 3.0584166049957275, - "learning_rate": 1.935165000599368e-05, - "loss": 1.2788, - "step": 501 - }, - { - "epoch": 0.37994323557237464, - "grad_norm": 3.422832727432251, - "learning_rate": 1.9348803308636836e-05, - "loss": 1.3315, - "step": 502 - }, - { - "epoch": 0.38070009460737936, - "grad_norm": 3.052250623703003, - "learning_rate": 1.9345950585828543e-05, - "loss": 1.2772, - "step": 503 - }, - { - "epoch": 0.38145695364238413, - "grad_norm": 3.164451837539673, - "learning_rate": 1.9343091839407608e-05, - "loss": 1.2796, - "step": 504 - }, - { - "epoch": 0.38221381267738885, - "grad_norm": 2.815291166305542, - "learning_rate": 1.9340227071216747e-05, - "loss": 1.2473, - "step": 505 - }, - { - "epoch": 0.3829706717123936, - "grad_norm": 2.953880548477173, - "learning_rate": 1.9337356283102543e-05, - "loss": 1.299, - "step": 506 - }, - { - "epoch": 0.3837275307473983, - "grad_norm": 2.8606905937194824, - "learning_rate": 1.9334479476915462e-05, - "loss": 1.3075, - "step": 507 - }, - { - "epoch": 0.384484389782403, - "grad_norm": 2.8410329818725586, - "learning_rate": 1.9331596654509848e-05, - "loss": 1.3377, - "step": 508 - }, - { - "epoch": 0.38524124881740773, - "grad_norm": 3.284508228302002, - "learning_rate": 1.9328707817743923e-05, - "loss": 1.2549, - "step": 509 - }, - { - "epoch": 0.3859981078524125, - "grad_norm": 2.892754554748535, - "learning_rate": 1.9325812968479793e-05, - "loss": 1.292, - "step": 510 - }, - { - "epoch": 0.38675496688741723, - "grad_norm": 3.1711387634277344, - "learning_rate": 1.932291210858343e-05, - "loss": 1.2258, - "step": 511 - }, - { - "epoch": 0.38751182592242195, - "grad_norm": 2.6686558723449707, - "learning_rate": 1.932000523992468e-05, - "loss": 1.246, - "step": 512 - }, - { - "epoch": 0.38826868495742667, - "grad_norm": 2.947592258453369, - "learning_rate": 1.9317092364377273e-05, - "loss": 1.2544, - "step": 513 - }, - { - "epoch": 0.3890255439924314, - "grad_norm": 3.5232532024383545, - "learning_rate": 1.93141734838188e-05, - "loss": 1.3041, - "step": 514 - }, - { - "epoch": 0.38978240302743616, - "grad_norm": 3.5411503314971924, - "learning_rate": 1.931124860013073e-05, - "loss": 1.2852, - "step": 515 - }, - { - "epoch": 0.3905392620624409, - "grad_norm": 3.0337717533111572, - "learning_rate": 1.93083177151984e-05, - "loss": 1.2867, - "step": 516 - }, - { - "epoch": 0.3912961210974456, - "grad_norm": 3.082791805267334, - "learning_rate": 1.9305380830911002e-05, - "loss": 1.2981, - "step": 517 - }, - { - "epoch": 0.3920529801324503, - "grad_norm": 3.1100258827209473, - "learning_rate": 1.9302437949161622e-05, - "loss": 1.2645, - "step": 518 - }, - { - "epoch": 0.39280983916745504, - "grad_norm": 3.3867480754852295, - "learning_rate": 1.9299489071847185e-05, - "loss": 1.3555, - "step": 519 - }, - { - "epoch": 0.3935666982024598, - "grad_norm": 3.1625607013702393, - "learning_rate": 1.9296534200868504e-05, - "loss": 1.3111, - "step": 520 - }, - { - "epoch": 0.39432355723746454, - "grad_norm": 4.334456443786621, - "learning_rate": 1.929357333813023e-05, - "loss": 1.3102, - "step": 521 - }, - { - "epoch": 0.39508041627246926, - "grad_norm": 3.650447130203247, - "learning_rate": 1.9290606485540903e-05, - "loss": 1.3129, - "step": 522 - }, - { - "epoch": 0.395837275307474, - "grad_norm": 3.8689398765563965, - "learning_rate": 1.9287633645012898e-05, - "loss": 1.2974, - "step": 523 - }, - { - "epoch": 0.3965941343424787, - "grad_norm": 3.520089864730835, - "learning_rate": 1.9284654818462474e-05, - "loss": 1.291, - "step": 524 - }, - { - "epoch": 0.3973509933774834, - "grad_norm": 4.220740795135498, - "learning_rate": 1.9281670007809735e-05, - "loss": 1.3039, - "step": 525 - }, - { - "epoch": 0.3981078524124882, - "grad_norm": 3.871176242828369, - "learning_rate": 1.9278679214978637e-05, - "loss": 1.2682, - "step": 526 - }, - { - "epoch": 0.3988647114474929, - "grad_norm": 3.2446093559265137, - "learning_rate": 1.9275682441897007e-05, - "loss": 1.2866, - "step": 527 - }, - { - "epoch": 0.39962157048249763, - "grad_norm": 3.475529432296753, - "learning_rate": 1.9272679690496517e-05, - "loss": 1.344, - "step": 528 - }, - { - "epoch": 0.40037842951750235, - "grad_norm": 3.29640531539917, - "learning_rate": 1.9269670962712695e-05, - "loss": 1.3257, - "step": 529 - }, - { - "epoch": 0.4011352885525071, - "grad_norm": 3.43729305267334, - "learning_rate": 1.9266656260484925e-05, - "loss": 1.3504, - "step": 530 - }, - { - "epoch": 0.40189214758751185, - "grad_norm": 3.6663601398468018, - "learning_rate": 1.9263635585756424e-05, - "loss": 1.2738, - "step": 531 - }, - { - "epoch": 0.40264900662251657, - "grad_norm": 3.4716086387634277, - "learning_rate": 1.9260608940474293e-05, - "loss": 1.2997, - "step": 532 - }, - { - "epoch": 0.4034058656575213, - "grad_norm": 3.0576701164245605, - "learning_rate": 1.9257576326589448e-05, - "loss": 1.2958, - "step": 533 - }, - { - "epoch": 0.404162724692526, - "grad_norm": 3.7031450271606445, - "learning_rate": 1.9254537746056664e-05, - "loss": 1.2537, - "step": 534 - }, - { - "epoch": 0.40491958372753073, - "grad_norm": 3.070580005645752, - "learning_rate": 1.925149320083457e-05, - "loss": 1.3362, - "step": 535 - }, - { - "epoch": 0.4056764427625355, - "grad_norm": 3.241197347640991, - "learning_rate": 1.9248442692885634e-05, - "loss": 1.2984, - "step": 536 - }, - { - "epoch": 0.4064333017975402, - "grad_norm": 2.7833101749420166, - "learning_rate": 1.9245386224176162e-05, - "loss": 1.2589, - "step": 537 - }, - { - "epoch": 0.40719016083254495, - "grad_norm": 2.8053226470947266, - "learning_rate": 1.9242323796676313e-05, - "loss": 1.277, - "step": 538 - }, - { - "epoch": 0.40794701986754967, - "grad_norm": 3.119124412536621, - "learning_rate": 1.9239255412360075e-05, - "loss": 1.2516, - "step": 539 - }, - { - "epoch": 0.4087038789025544, - "grad_norm": 3.013762950897217, - "learning_rate": 1.923618107320529e-05, - "loss": 1.2988, - "step": 540 - }, - { - "epoch": 0.4094607379375591, - "grad_norm": 2.8327529430389404, - "learning_rate": 1.923310078119362e-05, - "loss": 1.2596, - "step": 541 - }, - { - "epoch": 0.4102175969725639, - "grad_norm": 2.7732462882995605, - "learning_rate": 1.9230014538310575e-05, - "loss": 1.2525, - "step": 542 - }, - { - "epoch": 0.4109744560075686, - "grad_norm": 2.984377145767212, - "learning_rate": 1.9226922346545513e-05, - "loss": 1.2688, - "step": 543 - }, - { - "epoch": 0.4117313150425733, - "grad_norm": 3.146101474761963, - "learning_rate": 1.92238242078916e-05, - "loss": 1.3291, - "step": 544 - }, - { - "epoch": 0.41248817407757804, - "grad_norm": 2.911142587661743, - "learning_rate": 1.9220720124345855e-05, - "loss": 1.2372, - "step": 545 - }, - { - "epoch": 0.41324503311258276, - "grad_norm": 3.006364345550537, - "learning_rate": 1.921761009790912e-05, - "loss": 1.2157, - "step": 546 - }, - { - "epoch": 0.41400189214758754, - "grad_norm": 2.9054133892059326, - "learning_rate": 1.9214494130586074e-05, - "loss": 1.3591, - "step": 547 - }, - { - "epoch": 0.41475875118259226, - "grad_norm": 2.9922358989715576, - "learning_rate": 1.9211372224385222e-05, - "loss": 1.3093, - "step": 548 - }, - { - "epoch": 0.415515610217597, - "grad_norm": 2.6461005210876465, - "learning_rate": 1.9208244381318892e-05, - "loss": 1.2585, - "step": 549 - }, - { - "epoch": 0.4162724692526017, - "grad_norm": 2.7143542766571045, - "learning_rate": 1.9205110603403247e-05, - "loss": 1.2594, - "step": 550 - }, - { - "epoch": 0.4170293282876064, - "grad_norm": 2.9333744049072266, - "learning_rate": 1.9201970892658273e-05, - "loss": 1.3178, - "step": 551 - }, - { - "epoch": 0.4177861873226112, - "grad_norm": 2.956841230392456, - "learning_rate": 1.919882525110778e-05, - "loss": 1.2745, - "step": 552 - }, - { - "epoch": 0.4185430463576159, - "grad_norm": 3.0672903060913086, - "learning_rate": 1.91956736807794e-05, - "loss": 1.2648, - "step": 553 - }, - { - "epoch": 0.41929990539262063, - "grad_norm": 2.7969796657562256, - "learning_rate": 1.9192516183704587e-05, - "loss": 1.3154, - "step": 554 - }, - { - "epoch": 0.42005676442762535, - "grad_norm": 2.9009835720062256, - "learning_rate": 1.9189352761918616e-05, - "loss": 1.2412, - "step": 555 - }, - { - "epoch": 0.42081362346263007, - "grad_norm": 2.8731672763824463, - "learning_rate": 1.918618341746058e-05, - "loss": 1.2811, - "step": 556 - }, - { - "epoch": 0.4215704824976348, - "grad_norm": 2.7065563201904297, - "learning_rate": 1.918300815237339e-05, - "loss": 1.2895, - "step": 557 - }, - { - "epoch": 0.42232734153263957, - "grad_norm": 2.670109748840332, - "learning_rate": 1.9179826968703775e-05, - "loss": 1.2809, - "step": 558 - }, - { - "epoch": 0.4230842005676443, - "grad_norm": 2.9249067306518555, - "learning_rate": 1.9176639868502273e-05, - "loss": 1.3528, - "step": 559 - }, - { - "epoch": 0.423841059602649, - "grad_norm": 2.733651638031006, - "learning_rate": 1.917344685382325e-05, - "loss": 1.2516, - "step": 560 - }, - { - "epoch": 0.4245979186376537, - "grad_norm": 3.126077651977539, - "learning_rate": 1.9170247926724863e-05, - "loss": 1.3048, - "step": 561 - }, - { - "epoch": 0.42535477767265845, - "grad_norm": 3.024705648422241, - "learning_rate": 1.9167043089269096e-05, - "loss": 1.2871, - "step": 562 - }, - { - "epoch": 0.4261116367076632, - "grad_norm": 3.0809972286224365, - "learning_rate": 1.916383234352174e-05, - "loss": 1.2939, - "step": 563 - }, - { - "epoch": 0.42686849574266794, - "grad_norm": 2.8006155490875244, - "learning_rate": 1.9160615691552388e-05, - "loss": 1.2681, - "step": 564 - }, - { - "epoch": 0.42762535477767266, - "grad_norm": 3.146348714828491, - "learning_rate": 1.915739313543445e-05, - "loss": 1.299, - "step": 565 - }, - { - "epoch": 0.4283822138126774, - "grad_norm": 2.707672119140625, - "learning_rate": 1.915416467724514e-05, - "loss": 1.305, - "step": 566 - }, - { - "epoch": 0.4291390728476821, - "grad_norm": 3.0839362144470215, - "learning_rate": 1.9150930319065465e-05, - "loss": 1.2806, - "step": 567 - }, - { - "epoch": 0.4298959318826869, - "grad_norm": 2.6987831592559814, - "learning_rate": 1.9147690062980243e-05, - "loss": 1.2449, - "step": 568 - }, - { - "epoch": 0.4306527909176916, - "grad_norm": 3.5137927532196045, - "learning_rate": 1.9144443911078098e-05, - "loss": 1.2525, - "step": 569 - }, - { - "epoch": 0.4314096499526963, - "grad_norm": 2.656526803970337, - "learning_rate": 1.914119186545145e-05, - "loss": 1.2801, - "step": 570 - }, - { - "epoch": 0.43216650898770104, - "grad_norm": 2.7091798782348633, - "learning_rate": 1.9137933928196514e-05, - "loss": 1.2743, - "step": 571 - }, - { - "epoch": 0.43292336802270576, - "grad_norm": 2.6860084533691406, - "learning_rate": 1.913467010141331e-05, - "loss": 1.2569, - "step": 572 - }, - { - "epoch": 0.4336802270577105, - "grad_norm": 2.8987984657287598, - "learning_rate": 1.9131400387205653e-05, - "loss": 1.2411, - "step": 573 - }, - { - "epoch": 0.43443708609271525, - "grad_norm": 2.579749584197998, - "learning_rate": 1.9128124787681145e-05, - "loss": 1.2344, - "step": 574 - }, - { - "epoch": 0.43519394512771997, - "grad_norm": 2.835766553878784, - "learning_rate": 1.912484330495119e-05, - "loss": 1.2922, - "step": 575 - }, - { - "epoch": 0.4359508041627247, - "grad_norm": 3.549691915512085, - "learning_rate": 1.9121555941130986e-05, - "loss": 1.2908, - "step": 576 - }, - { - "epoch": 0.4367076631977294, - "grad_norm": 2.881730556488037, - "learning_rate": 1.911826269833951e-05, - "loss": 1.2787, - "step": 577 - }, - { - "epoch": 0.43746452223273413, - "grad_norm": 2.881334066390991, - "learning_rate": 1.9114963578699538e-05, - "loss": 1.3111, - "step": 578 - }, - { - "epoch": 0.4382213812677389, - "grad_norm": 2.941556453704834, - "learning_rate": 1.911165858433764e-05, - "loss": 1.2857, - "step": 579 - }, - { - "epoch": 0.4389782403027436, - "grad_norm": 2.6916472911834717, - "learning_rate": 1.9108347717384156e-05, - "loss": 1.2512, - "step": 580 - }, - { - "epoch": 0.43973509933774835, - "grad_norm": 3.0234310626983643, - "learning_rate": 1.9105030979973223e-05, - "loss": 1.2089, - "step": 581 - }, - { - "epoch": 0.44049195837275307, - "grad_norm": 2.7675161361694336, - "learning_rate": 1.9101708374242764e-05, - "loss": 1.3253, - "step": 582 - }, - { - "epoch": 0.4412488174077578, - "grad_norm": 2.746612310409546, - "learning_rate": 1.909837990233447e-05, - "loss": 1.2554, - "step": 583 - }, - { - "epoch": 0.44200567644276256, - "grad_norm": 2.629913091659546, - "learning_rate": 1.9095045566393834e-05, - "loss": 1.3158, - "step": 584 - }, - { - "epoch": 0.4427625354777673, - "grad_norm": 3.0382394790649414, - "learning_rate": 1.909170536857011e-05, - "loss": 1.3382, - "step": 585 - }, - { - "epoch": 0.443519394512772, - "grad_norm": 3.1332645416259766, - "learning_rate": 1.908835931101634e-05, - "loss": 1.2561, - "step": 586 - }, - { - "epoch": 0.4442762535477767, - "grad_norm": 2.91369891166687, - "learning_rate": 1.9085007395889342e-05, - "loss": 1.287, - "step": 587 - }, - { - "epoch": 0.44503311258278144, - "grad_norm": 2.6690382957458496, - "learning_rate": 1.9081649625349715e-05, - "loss": 1.275, - "step": 588 - }, - { - "epoch": 0.44578997161778616, - "grad_norm": 2.7576904296875, - "learning_rate": 1.9078286001561822e-05, - "loss": 1.2669, - "step": 589 - }, - { - "epoch": 0.44654683065279094, - "grad_norm": 2.731320381164551, - "learning_rate": 1.9074916526693804e-05, - "loss": 1.292, - "step": 590 - }, - { - "epoch": 0.44730368968779566, - "grad_norm": 2.6240909099578857, - "learning_rate": 1.9071541202917572e-05, - "loss": 1.2852, - "step": 591 - }, - { - "epoch": 0.4480605487228004, - "grad_norm": 2.8189620971679688, - "learning_rate": 1.906816003240881e-05, - "loss": 1.2655, - "step": 592 - }, - { - "epoch": 0.4488174077578051, - "grad_norm": 2.7323951721191406, - "learning_rate": 1.906477301734697e-05, - "loss": 1.2942, - "step": 593 - }, - { - "epoch": 0.4495742667928098, - "grad_norm": 2.8606555461883545, - "learning_rate": 1.9061380159915262e-05, - "loss": 1.3039, - "step": 594 - }, - { - "epoch": 0.4503311258278146, - "grad_norm": 2.7523887157440186, - "learning_rate": 1.9057981462300683e-05, - "loss": 1.2372, - "step": 595 - }, - { - "epoch": 0.4510879848628193, - "grad_norm": 3.1251001358032227, - "learning_rate": 1.9054576926693977e-05, - "loss": 1.2726, - "step": 596 - }, - { - "epoch": 0.45184484389782403, - "grad_norm": 3.1092488765716553, - "learning_rate": 1.9051166555289652e-05, - "loss": 1.3126, - "step": 597 - }, - { - "epoch": 0.45260170293282875, - "grad_norm": 2.722238302230835, - "learning_rate": 1.904775035028598e-05, - "loss": 1.2765, - "step": 598 - }, - { - "epoch": 0.4533585619678335, - "grad_norm": 3.9474592208862305, - "learning_rate": 1.9044328313885e-05, - "loss": 1.2389, - "step": 599 - }, - { - "epoch": 0.45411542100283825, - "grad_norm": 2.7783472537994385, - "learning_rate": 1.90409004482925e-05, - "loss": 1.2683, - "step": 600 - }, - { - "epoch": 0.45487228003784297, - "grad_norm": 2.7635014057159424, - "learning_rate": 1.9037466755718038e-05, - "loss": 1.3073, - "step": 601 - }, - { - "epoch": 0.4556291390728477, - "grad_norm": 2.899637222290039, - "learning_rate": 1.903402723837491e-05, - "loss": 1.2682, - "step": 602 - }, - { - "epoch": 0.4563859981078524, - "grad_norm": 2.5725064277648926, - "learning_rate": 1.9030581898480182e-05, - "loss": 1.2445, - "step": 603 - }, - { - "epoch": 0.45714285714285713, - "grad_norm": 2.767765760421753, - "learning_rate": 1.902713073825467e-05, - "loss": 1.3006, - "step": 604 - }, - { - "epoch": 0.45789971617786185, - "grad_norm": 2.7437305450439453, - "learning_rate": 1.902367375992293e-05, - "loss": 1.256, - "step": 605 - }, - { - "epoch": 0.4586565752128666, - "grad_norm": 2.764497756958008, - "learning_rate": 1.9020210965713287e-05, - "loss": 1.2316, - "step": 606 - }, - { - "epoch": 0.45941343424787134, - "grad_norm": 2.6510708332061768, - "learning_rate": 1.9016742357857802e-05, - "loss": 1.2413, - "step": 607 - }, - { - "epoch": 0.46017029328287606, - "grad_norm": 2.727973699569702, - "learning_rate": 1.9013267938592282e-05, - "loss": 1.2779, - "step": 608 - }, - { - "epoch": 0.4609271523178808, - "grad_norm": 2.7336103916168213, - "learning_rate": 1.900978771015629e-05, - "loss": 1.3133, - "step": 609 - }, - { - "epoch": 0.4616840113528855, - "grad_norm": 2.635427713394165, - "learning_rate": 1.9006301674793128e-05, - "loss": 1.233, - "step": 610 - }, - { - "epoch": 0.4624408703878903, - "grad_norm": 2.99351167678833, - "learning_rate": 1.900280983474984e-05, - "loss": 1.2353, - "step": 611 - }, - { - "epoch": 0.463197729422895, - "grad_norm": 3.155054807662964, - "learning_rate": 1.8999312192277217e-05, - "loss": 1.3258, - "step": 612 - }, - { - "epoch": 0.4639545884578997, - "grad_norm": 2.745626926422119, - "learning_rate": 1.8995808749629773e-05, - "loss": 1.2321, - "step": 613 - }, - { - "epoch": 0.46471144749290444, - "grad_norm": 2.662928819656372, - "learning_rate": 1.899229950906579e-05, - "loss": 1.2291, - "step": 614 - }, - { - "epoch": 0.46546830652790916, - "grad_norm": 2.684296131134033, - "learning_rate": 1.8988784472847262e-05, - "loss": 1.2575, - "step": 615 - }, - { - "epoch": 0.46622516556291393, - "grad_norm": 2.850404977798462, - "learning_rate": 1.8985263643239932e-05, - "loss": 1.2727, - "step": 616 - }, - { - "epoch": 0.46698202459791865, - "grad_norm": 2.8185768127441406, - "learning_rate": 1.8981737022513268e-05, - "loss": 1.2145, - "step": 617 - }, - { - "epoch": 0.4677388836329234, - "grad_norm": 2.865675449371338, - "learning_rate": 1.8978204612940476e-05, - "loss": 1.2602, - "step": 618 - }, - { - "epoch": 0.4684957426679281, - "grad_norm": 2.706779718399048, - "learning_rate": 1.8974666416798496e-05, - "loss": 1.2578, - "step": 619 - }, - { - "epoch": 0.4692526017029328, - "grad_norm": 2.7865641117095947, - "learning_rate": 1.8971122436368002e-05, - "loss": 1.2549, - "step": 620 - }, - { - "epoch": 0.47000946073793753, - "grad_norm": 3.0289227962493896, - "learning_rate": 1.8967572673933373e-05, - "loss": 1.2794, - "step": 621 - }, - { - "epoch": 0.4707663197729423, - "grad_norm": 2.986976146697998, - "learning_rate": 1.8964017131782748e-05, - "loss": 1.2666, - "step": 622 - }, - { - "epoch": 0.47152317880794703, - "grad_norm": 2.907590866088867, - "learning_rate": 1.896045581220797e-05, - "loss": 1.3149, - "step": 623 - }, - { - "epoch": 0.47228003784295175, - "grad_norm": 2.5124711990356445, - "learning_rate": 1.8956888717504607e-05, - "loss": 1.2692, - "step": 624 - }, - { - "epoch": 0.47303689687795647, - "grad_norm": 2.8450794219970703, - "learning_rate": 1.8953315849971956e-05, - "loss": 1.2385, - "step": 625 - }, - { - "epoch": 0.4737937559129612, - "grad_norm": 3.127713441848755, - "learning_rate": 1.8949737211913038e-05, - "loss": 1.2725, - "step": 626 - }, - { - "epoch": 0.47455061494796597, - "grad_norm": 3.0674550533294678, - "learning_rate": 1.894615280563458e-05, - "loss": 1.3016, - "step": 627 - }, - { - "epoch": 0.4753074739829707, - "grad_norm": 3.29008150100708, - "learning_rate": 1.894256263344704e-05, - "loss": 1.2382, - "step": 628 - }, - { - "epoch": 0.4760643330179754, - "grad_norm": 3.2081003189086914, - "learning_rate": 1.8938966697664592e-05, - "loss": 1.259, - "step": 629 - }, - { - "epoch": 0.4768211920529801, - "grad_norm": 2.922011613845825, - "learning_rate": 1.8935365000605116e-05, - "loss": 1.3017, - "step": 630 - }, - { - "epoch": 0.47757805108798485, - "grad_norm": 3.075958490371704, - "learning_rate": 1.893175754459021e-05, - "loss": 1.2595, - "step": 631 - }, - { - "epoch": 0.4783349101229896, - "grad_norm": 2.9022579193115234, - "learning_rate": 1.892814433194519e-05, - "loss": 1.3033, - "step": 632 - }, - { - "epoch": 0.47909176915799434, - "grad_norm": 2.9433717727661133, - "learning_rate": 1.8924525364999077e-05, - "loss": 1.2636, - "step": 633 - }, - { - "epoch": 0.47984862819299906, - "grad_norm": 2.9550983905792236, - "learning_rate": 1.89209006460846e-05, - "loss": 1.2936, - "step": 634 - }, - { - "epoch": 0.4806054872280038, - "grad_norm": 2.8603897094726562, - "learning_rate": 1.8917270177538198e-05, - "loss": 1.2497, - "step": 635 - }, - { - "epoch": 0.4813623462630085, - "grad_norm": 3.0159318447113037, - "learning_rate": 1.8913633961700014e-05, - "loss": 1.2627, - "step": 636 - }, - { - "epoch": 0.4821192052980132, - "grad_norm": 3.3943378925323486, - "learning_rate": 1.8909992000913896e-05, - "loss": 1.2977, - "step": 637 - }, - { - "epoch": 0.482876064333018, - "grad_norm": 2.8387339115142822, - "learning_rate": 1.8906344297527403e-05, - "loss": 1.2922, - "step": 638 - }, - { - "epoch": 0.4836329233680227, - "grad_norm": 2.8385610580444336, - "learning_rate": 1.8902690853891787e-05, - "loss": 1.2023, - "step": 639 - }, - { - "epoch": 0.48438978240302744, - "grad_norm": 3.155811309814453, - "learning_rate": 1.8899031672362e-05, - "loss": 1.3069, - "step": 640 - }, - { - "epoch": 0.48514664143803216, - "grad_norm": 3.442098617553711, - "learning_rate": 1.8895366755296693e-05, - "loss": 1.2361, - "step": 641 - }, - { - "epoch": 0.4859035004730369, - "grad_norm": 2.805680751800537, - "learning_rate": 1.8891696105058218e-05, - "loss": 1.2349, - "step": 642 - }, - { - "epoch": 0.48666035950804165, - "grad_norm": 2.7870709896087646, - "learning_rate": 1.8888019724012618e-05, - "loss": 1.3326, - "step": 643 - }, - { - "epoch": 0.48741721854304637, - "grad_norm": 2.8645455837249756, - "learning_rate": 1.8884337614529636e-05, - "loss": 1.2829, - "step": 644 - }, - { - "epoch": 0.4881740775780511, - "grad_norm": 2.8770759105682373, - "learning_rate": 1.88806497789827e-05, - "loss": 1.2268, - "step": 645 - }, - { - "epoch": 0.4889309366130558, - "grad_norm": 2.8018059730529785, - "learning_rate": 1.8876956219748934e-05, - "loss": 1.2566, - "step": 646 - }, - { - "epoch": 0.48968779564806053, - "grad_norm": 3.0624117851257324, - "learning_rate": 1.887325693920915e-05, - "loss": 1.2776, - "step": 647 - }, - { - "epoch": 0.4904446546830653, - "grad_norm": 2.7411904335021973, - "learning_rate": 1.886955193974785e-05, - "loss": 1.2941, - "step": 648 - }, - { - "epoch": 0.49120151371807, - "grad_norm": 2.4694104194641113, - "learning_rate": 1.8865841223753216e-05, - "loss": 1.245, - "step": 649 - }, - { - "epoch": 0.49195837275307475, - "grad_norm": 2.4889931678771973, - "learning_rate": 1.886212479361712e-05, - "loss": 1.2664, - "step": 650 - }, - { - "epoch": 0.49271523178807947, - "grad_norm": 2.699221134185791, - "learning_rate": 1.885840265173512e-05, - "loss": 1.245, - "step": 651 - }, - { - "epoch": 0.4934720908230842, - "grad_norm": 3.0901527404785156, - "learning_rate": 1.8854674800506447e-05, - "loss": 1.2683, - "step": 652 - }, - { - "epoch": 0.4942289498580889, - "grad_norm": 2.5710549354553223, - "learning_rate": 1.8850941242334024e-05, - "loss": 1.2677, - "step": 653 - }, - { - "epoch": 0.4949858088930937, - "grad_norm": 2.747673988342285, - "learning_rate": 1.8847201979624433e-05, - "loss": 1.2487, - "step": 654 - }, - { - "epoch": 0.4957426679280984, - "grad_norm": 2.6453075408935547, - "learning_rate": 1.8843457014787954e-05, - "loss": 1.2534, - "step": 655 - }, - { - "epoch": 0.4964995269631031, - "grad_norm": 2.3280134201049805, - "learning_rate": 1.8839706350238537e-05, - "loss": 1.2529, - "step": 656 - }, - { - "epoch": 0.49725638599810784, - "grad_norm": 2.353527307510376, - "learning_rate": 1.88359499883938e-05, - "loss": 1.2612, - "step": 657 - }, - { - "epoch": 0.49801324503311256, - "grad_norm": 2.827341318130493, - "learning_rate": 1.8832187931675036e-05, - "loss": 1.2883, - "step": 658 - }, - { - "epoch": 0.49877010406811734, - "grad_norm": 2.620957374572754, - "learning_rate": 1.882842018250721e-05, - "loss": 1.25, - "step": 659 - }, - { - "epoch": 0.49952696310312206, - "grad_norm": 2.600372076034546, - "learning_rate": 1.8824646743318955e-05, - "loss": 1.2497, - "step": 660 - }, - { - "epoch": 0.5002838221381267, - "grad_norm": 2.544832706451416, - "learning_rate": 1.882086761654257e-05, - "loss": 1.2656, - "step": 661 - }, - { - "epoch": 0.5010406811731315, - "grad_norm": 2.809065818786621, - "learning_rate": 1.881708280461403e-05, - "loss": 1.3098, - "step": 662 - }, - { - "epoch": 0.5017975402081363, - "grad_norm": 2.423124313354492, - "learning_rate": 1.881329230997296e-05, - "loss": 1.2676, - "step": 663 - }, - { - "epoch": 0.5025543992431409, - "grad_norm": 2.6886796951293945, - "learning_rate": 1.880949613506266e-05, - "loss": 1.2764, - "step": 664 - }, - { - "epoch": 0.5033112582781457, - "grad_norm": 2.9043877124786377, - "learning_rate": 1.8805694282330076e-05, - "loss": 1.2499, - "step": 665 - }, - { - "epoch": 0.5040681173131504, - "grad_norm": 2.5381906032562256, - "learning_rate": 1.880188675422584e-05, - "loss": 1.2429, - "step": 666 - }, - { - "epoch": 0.5048249763481552, - "grad_norm": 2.5368845462799072, - "learning_rate": 1.8798073553204216e-05, - "loss": 1.2992, - "step": 667 - }, - { - "epoch": 0.5055818353831599, - "grad_norm": 2.313969850540161, - "learning_rate": 1.879425468172314e-05, - "loss": 1.2602, - "step": 668 - }, - { - "epoch": 0.5063386944181646, - "grad_norm": 2.473052978515625, - "learning_rate": 1.8790430142244192e-05, - "loss": 1.2558, - "step": 669 - }, - { - "epoch": 0.5070955534531694, - "grad_norm": 2.5860140323638916, - "learning_rate": 1.878659993723262e-05, - "loss": 1.2489, - "step": 670 - }, - { - "epoch": 0.507852412488174, - "grad_norm": 2.7334864139556885, - "learning_rate": 1.8782764069157307e-05, - "loss": 1.2892, - "step": 671 - }, - { - "epoch": 0.5086092715231788, - "grad_norm": 2.7741503715515137, - "learning_rate": 1.8778922540490803e-05, - "loss": 1.214, - "step": 672 - }, - { - "epoch": 0.5093661305581836, - "grad_norm": 2.3246145248413086, - "learning_rate": 1.8775075353709294e-05, - "loss": 1.2301, - "step": 673 - }, - { - "epoch": 0.5101229895931882, - "grad_norm": 2.879974365234375, - "learning_rate": 1.8771222511292622e-05, - "loss": 1.2351, - "step": 674 - }, - { - "epoch": 0.510879848628193, - "grad_norm": 2.5754384994506836, - "learning_rate": 1.8767364015724266e-05, - "loss": 1.2701, - "step": 675 - }, - { - "epoch": 0.5116367076631977, - "grad_norm": 2.623716115951538, - "learning_rate": 1.8763499869491356e-05, - "loss": 1.2934, - "step": 676 - }, - { - "epoch": 0.5123935666982025, - "grad_norm": 2.6354804039001465, - "learning_rate": 1.8759630075084664e-05, - "loss": 1.2454, - "step": 677 - }, - { - "epoch": 0.5131504257332072, - "grad_norm": 2.550604820251465, - "learning_rate": 1.8755754634998593e-05, - "loss": 1.2555, - "step": 678 - }, - { - "epoch": 0.5139072847682119, - "grad_norm": 2.5519111156463623, - "learning_rate": 1.8751873551731196e-05, - "loss": 1.2384, - "step": 679 - }, - { - "epoch": 0.5146641438032167, - "grad_norm": 2.6348938941955566, - "learning_rate": 1.8747986827784167e-05, - "loss": 1.2453, - "step": 680 - }, - { - "epoch": 0.5154210028382213, - "grad_norm": 2.5110082626342773, - "learning_rate": 1.874409446566282e-05, - "loss": 1.3047, - "step": 681 - }, - { - "epoch": 0.5161778618732261, - "grad_norm": 2.5216503143310547, - "learning_rate": 1.8740196467876114e-05, - "loss": 1.2464, - "step": 682 - }, - { - "epoch": 0.5169347209082309, - "grad_norm": 2.737325668334961, - "learning_rate": 1.8736292836936643e-05, - "loss": 1.2666, - "step": 683 - }, - { - "epoch": 0.5176915799432356, - "grad_norm": 2.625519037246704, - "learning_rate": 1.8732383575360625e-05, - "loss": 1.2403, - "step": 684 - }, - { - "epoch": 0.5184484389782403, - "grad_norm": 2.784569263458252, - "learning_rate": 1.8728468685667914e-05, - "loss": 1.2627, - "step": 685 - }, - { - "epoch": 0.519205298013245, - "grad_norm": 2.7349774837493896, - "learning_rate": 1.8724548170381983e-05, - "loss": 1.2771, - "step": 686 - }, - { - "epoch": 0.5199621570482498, - "grad_norm": 2.681603193283081, - "learning_rate": 1.8720622032029936e-05, - "loss": 1.276, - "step": 687 - }, - { - "epoch": 0.5207190160832545, - "grad_norm": 2.767359972000122, - "learning_rate": 1.8716690273142504e-05, - "loss": 1.2279, - "step": 688 - }, - { - "epoch": 0.5214758751182592, - "grad_norm": 2.5928122997283936, - "learning_rate": 1.871275289625404e-05, - "loss": 1.2568, - "step": 689 - }, - { - "epoch": 0.522232734153264, - "grad_norm": 2.6970558166503906, - "learning_rate": 1.8708809903902517e-05, - "loss": 1.3101, - "step": 690 - }, - { - "epoch": 0.5229895931882687, - "grad_norm": 2.6737709045410156, - "learning_rate": 1.8704861298629524e-05, - "loss": 1.2575, - "step": 691 - }, - { - "epoch": 0.5237464522232734, - "grad_norm": 3.0363659858703613, - "learning_rate": 1.870090708298028e-05, - "loss": 1.3034, - "step": 692 - }, - { - "epoch": 0.5245033112582781, - "grad_norm": 2.817183017730713, - "learning_rate": 1.8696947259503603e-05, - "loss": 1.2962, - "step": 693 - }, - { - "epoch": 0.5252601702932829, - "grad_norm": 3.507577896118164, - "learning_rate": 1.8692981830751937e-05, - "loss": 1.2643, - "step": 694 - }, - { - "epoch": 0.5260170293282876, - "grad_norm": 2.9019994735717773, - "learning_rate": 1.868901079928134e-05, - "loss": 1.2968, - "step": 695 - }, - { - "epoch": 0.5267738883632923, - "grad_norm": 2.6820502281188965, - "learning_rate": 1.8685034167651477e-05, - "loss": 1.281, - "step": 696 - }, - { - "epoch": 0.5275307473982971, - "grad_norm": 2.5685501098632812, - "learning_rate": 1.8681051938425626e-05, - "loss": 1.2368, - "step": 697 - }, - { - "epoch": 0.5282876064333017, - "grad_norm": 2.943498134613037, - "learning_rate": 1.867706411417067e-05, - "loss": 1.2494, - "step": 698 - }, - { - "epoch": 0.5290444654683065, - "grad_norm": 2.9893808364868164, - "learning_rate": 1.8673070697457097e-05, - "loss": 1.3033, - "step": 699 - }, - { - "epoch": 0.5298013245033113, - "grad_norm": 3.192913293838501, - "learning_rate": 1.8669071690859002e-05, - "loss": 1.3122, - "step": 700 - }, - { - "epoch": 0.530558183538316, - "grad_norm": 2.6208715438842773, - "learning_rate": 1.866506709695409e-05, - "loss": 1.2335, - "step": 701 - }, - { - "epoch": 0.5313150425733207, - "grad_norm": 2.793226718902588, - "learning_rate": 1.8661056918323654e-05, - "loss": 1.2721, - "step": 702 - }, - { - "epoch": 0.5320719016083254, - "grad_norm": 2.809190034866333, - "learning_rate": 1.8657041157552597e-05, - "loss": 1.2318, - "step": 703 - }, - { - "epoch": 0.5328287606433302, - "grad_norm": 2.70646595954895, - "learning_rate": 1.865301981722942e-05, - "loss": 1.2471, - "step": 704 - }, - { - "epoch": 0.533585619678335, - "grad_norm": 2.691943407058716, - "learning_rate": 1.864899289994621e-05, - "loss": 1.2765, - "step": 705 - }, - { - "epoch": 0.5343424787133396, - "grad_norm": 2.6376893520355225, - "learning_rate": 1.864496040829867e-05, - "loss": 1.2932, - "step": 706 - }, - { - "epoch": 0.5350993377483444, - "grad_norm": 2.727936029434204, - "learning_rate": 1.8640922344886066e-05, - "loss": 1.2056, - "step": 707 - }, - { - "epoch": 0.5358561967833491, - "grad_norm": 2.599090337753296, - "learning_rate": 1.863687871231128e-05, - "loss": 1.2747, - "step": 708 - }, - { - "epoch": 0.5366130558183538, - "grad_norm": 2.9305431842803955, - "learning_rate": 1.863282951318078e-05, - "loss": 1.2593, - "step": 709 - }, - { - "epoch": 0.5373699148533586, - "grad_norm": 2.5242085456848145, - "learning_rate": 1.8628774750104615e-05, - "loss": 1.2669, - "step": 710 - }, - { - "epoch": 0.5381267738883633, - "grad_norm": 2.737729787826538, - "learning_rate": 1.862471442569642e-05, - "loss": 1.2515, - "step": 711 - }, - { - "epoch": 0.538883632923368, - "grad_norm": 2.8515143394470215, - "learning_rate": 1.8620648542573423e-05, - "loss": 1.2483, - "step": 712 - }, - { - "epoch": 0.5396404919583727, - "grad_norm": 2.8016417026519775, - "learning_rate": 1.8616577103356425e-05, - "loss": 1.2389, - "step": 713 - }, - { - "epoch": 0.5403973509933775, - "grad_norm": 2.9451699256896973, - "learning_rate": 1.861250011066982e-05, - "loss": 1.2345, - "step": 714 - }, - { - "epoch": 0.5411542100283823, - "grad_norm": 2.771279811859131, - "learning_rate": 1.8608417567141572e-05, - "loss": 1.2621, - "step": 715 - }, - { - "epoch": 0.5419110690633869, - "grad_norm": 2.9805190563201904, - "learning_rate": 1.860432947540322e-05, - "loss": 1.2348, - "step": 716 - }, - { - "epoch": 0.5426679280983917, - "grad_norm": 2.803847312927246, - "learning_rate": 1.8600235838089896e-05, - "loss": 1.241, - "step": 717 - }, - { - "epoch": 0.5434247871333964, - "grad_norm": 2.4871954917907715, - "learning_rate": 1.859613665784029e-05, - "loss": 1.2883, - "step": 718 - }, - { - "epoch": 0.5441816461684011, - "grad_norm": 3.067754030227661, - "learning_rate": 1.8592031937296673e-05, - "loss": 1.2833, - "step": 719 - }, - { - "epoch": 0.5449385052034059, - "grad_norm": 2.8348135948181152, - "learning_rate": 1.8587921679104887e-05, - "loss": 1.3083, - "step": 720 - }, - { - "epoch": 0.5456953642384106, - "grad_norm": 2.538663387298584, - "learning_rate": 1.8583805885914345e-05, - "loss": 1.2288, - "step": 721 - }, - { - "epoch": 0.5464522232734154, - "grad_norm": 2.7975425720214844, - "learning_rate": 1.857968456037801e-05, - "loss": 1.3166, - "step": 722 - }, - { - "epoch": 0.54720908230842, - "grad_norm": 2.60284423828125, - "learning_rate": 1.857555770515244e-05, - "loss": 1.251, - "step": 723 - }, - { - "epoch": 0.5479659413434248, - "grad_norm": 3.0047545433044434, - "learning_rate": 1.857142532289774e-05, - "loss": 1.2372, - "step": 724 - }, - { - "epoch": 0.5487228003784295, - "grad_norm": 2.7439827919006348, - "learning_rate": 1.8567287416277576e-05, - "loss": 1.2686, - "step": 725 - }, - { - "epoch": 0.5494796594134342, - "grad_norm": 2.7966012954711914, - "learning_rate": 1.856314398795918e-05, - "loss": 1.2997, - "step": 726 - }, - { - "epoch": 0.550236518448439, - "grad_norm": 2.4072394371032715, - "learning_rate": 1.855899504061335e-05, - "loss": 1.2371, - "step": 727 - }, - { - "epoch": 0.5509933774834437, - "grad_norm": 2.6710758209228516, - "learning_rate": 1.8554840576914425e-05, - "loss": 1.3084, - "step": 728 - }, - { - "epoch": 0.5517502365184485, - "grad_norm": 2.4834091663360596, - "learning_rate": 1.8550680599540315e-05, - "loss": 1.2335, - "step": 729 - }, - { - "epoch": 0.5525070955534531, - "grad_norm": 3.0747454166412354, - "learning_rate": 1.8546515111172475e-05, - "loss": 1.2691, - "step": 730 - }, - { - "epoch": 0.5532639545884579, - "grad_norm": 2.3881189823150635, - "learning_rate": 1.8542344114495918e-05, - "loss": 1.2852, - "step": 731 - }, - { - "epoch": 0.5540208136234627, - "grad_norm": 2.559795618057251, - "learning_rate": 1.85381676121992e-05, - "loss": 1.2266, - "step": 732 - }, - { - "epoch": 0.5547776726584673, - "grad_norm": 2.5426385402679443, - "learning_rate": 1.8533985606974436e-05, - "loss": 1.2136, - "step": 733 - }, - { - "epoch": 0.5555345316934721, - "grad_norm": 2.7627816200256348, - "learning_rate": 1.8529798101517283e-05, - "loss": 1.227, - "step": 734 - }, - { - "epoch": 0.5562913907284768, - "grad_norm": 3.559936285018921, - "learning_rate": 1.8525605098526935e-05, - "loss": 1.2823, - "step": 735 - }, - { - "epoch": 0.5570482497634816, - "grad_norm": 2.6380114555358887, - "learning_rate": 1.8521406600706146e-05, - "loss": 1.2077, - "step": 736 - }, - { - "epoch": 0.5578051087984863, - "grad_norm": 2.3080461025238037, - "learning_rate": 1.8517202610761203e-05, - "loss": 1.2146, - "step": 737 - }, - { - "epoch": 0.558561967833491, - "grad_norm": 2.245431423187256, - "learning_rate": 1.851299313140193e-05, - "loss": 1.2073, - "step": 738 - }, - { - "epoch": 0.5593188268684958, - "grad_norm": 2.4832706451416016, - "learning_rate": 1.8508778165341697e-05, - "loss": 1.2167, - "step": 739 - }, - { - "epoch": 0.5600756859035004, - "grad_norm": 2.646280288696289, - "learning_rate": 1.85045577152974e-05, - "loss": 1.2379, - "step": 740 - }, - { - "epoch": 0.5608325449385052, - "grad_norm": 2.449310302734375, - "learning_rate": 1.8500331783989486e-05, - "loss": 1.2085, - "step": 741 - }, - { - "epoch": 0.56158940397351, - "grad_norm": 2.7046239376068115, - "learning_rate": 1.8496100374141924e-05, - "loss": 1.2255, - "step": 742 - }, - { - "epoch": 0.5623462630085146, - "grad_norm": 2.5250003337860107, - "learning_rate": 1.849186348848221e-05, - "loss": 1.2028, - "step": 743 - }, - { - "epoch": 0.5631031220435194, - "grad_norm": 2.423783779144287, - "learning_rate": 1.848762112974138e-05, - "loss": 1.2485, - "step": 744 - }, - { - "epoch": 0.5638599810785241, - "grad_norm": 2.3143739700317383, - "learning_rate": 1.8483373300653995e-05, - "loss": 1.2238, - "step": 745 - }, - { - "epoch": 0.5646168401135289, - "grad_norm": 2.433070421218872, - "learning_rate": 1.8479120003958136e-05, - "loss": 1.2496, - "step": 746 - }, - { - "epoch": 0.5653736991485336, - "grad_norm": 2.5320703983306885, - "learning_rate": 1.8474861242395424e-05, - "loss": 1.2477, - "step": 747 - }, - { - "epoch": 0.5661305581835383, - "grad_norm": 5.566840171813965, - "learning_rate": 1.8470597018710976e-05, - "loss": 1.2086, - "step": 748 - }, - { - "epoch": 0.5668874172185431, - "grad_norm": 2.5624606609344482, - "learning_rate": 1.8466327335653458e-05, - "loss": 1.2458, - "step": 749 - }, - { - "epoch": 0.5676442762535477, - "grad_norm": 3.781528949737549, - "learning_rate": 1.846205219597504e-05, - "loss": 1.259, - "step": 750 - }, - { - "epoch": 0.5684011352885525, - "grad_norm": 2.4453654289245605, - "learning_rate": 1.8457771602431406e-05, - "loss": 1.2511, - "step": 751 - }, - { - "epoch": 0.5691579943235573, - "grad_norm": 2.4234702587127686, - "learning_rate": 1.8453485557781768e-05, - "loss": 1.2339, - "step": 752 - }, - { - "epoch": 0.569914853358562, - "grad_norm": 2.637007236480713, - "learning_rate": 1.8449194064788845e-05, - "loss": 1.2274, - "step": 753 - }, - { - "epoch": 0.5706717123935667, - "grad_norm": 2.557408332824707, - "learning_rate": 1.8444897126218865e-05, - "loss": 1.2718, - "step": 754 - }, - { - "epoch": 0.5714285714285714, - "grad_norm": 2.3460357189178467, - "learning_rate": 1.8440594744841564e-05, - "loss": 1.2522, - "step": 755 - }, - { - "epoch": 0.5721854304635762, - "grad_norm": 2.9702370166778564, - "learning_rate": 1.84362869234302e-05, - "loss": 1.2365, - "step": 756 - }, - { - "epoch": 0.5729422894985808, - "grad_norm": 2.4645347595214844, - "learning_rate": 1.843197366476153e-05, - "loss": 1.2497, - "step": 757 - }, - { - "epoch": 0.5736991485335856, - "grad_norm": 2.525984764099121, - "learning_rate": 1.8427654971615804e-05, - "loss": 1.2472, - "step": 758 - }, - { - "epoch": 0.5744560075685904, - "grad_norm": 2.598914861679077, - "learning_rate": 1.8423330846776797e-05, - "loss": 1.2783, - "step": 759 - }, - { - "epoch": 0.575212866603595, - "grad_norm": 2.464893341064453, - "learning_rate": 1.841900129303177e-05, - "loss": 1.2331, - "step": 760 - }, - { - "epoch": 0.5759697256385998, - "grad_norm": 2.517779588699341, - "learning_rate": 1.8414666313171488e-05, - "loss": 1.2087, - "step": 761 - }, - { - "epoch": 0.5767265846736045, - "grad_norm": 2.3364832401275635, - "learning_rate": 1.8410325909990207e-05, - "loss": 1.251, - "step": 762 - }, - { - "epoch": 0.5774834437086093, - "grad_norm": 2.348635673522949, - "learning_rate": 1.8405980086285693e-05, - "loss": 1.2424, - "step": 763 - }, - { - "epoch": 0.578240302743614, - "grad_norm": 2.472801446914673, - "learning_rate": 1.8401628844859193e-05, - "loss": 1.1972, - "step": 764 - }, - { - "epoch": 0.5789971617786187, - "grad_norm": 2.528832197189331, - "learning_rate": 1.839727218851545e-05, - "loss": 1.2904, - "step": 765 - }, - { - "epoch": 0.5797540208136235, - "grad_norm": 2.833585262298584, - "learning_rate": 1.83929101200627e-05, - "loss": 1.2284, - "step": 766 - }, - { - "epoch": 0.5805108798486281, - "grad_norm": 2.886864185333252, - "learning_rate": 1.838854264231267e-05, - "loss": 1.2529, - "step": 767 - }, - { - "epoch": 0.5812677388836329, - "grad_norm": 2.6184258460998535, - "learning_rate": 1.8384169758080564e-05, - "loss": 1.2422, - "step": 768 - }, - { - "epoch": 0.5820245979186377, - "grad_norm": 2.59594988822937, - "learning_rate": 1.8379791470185077e-05, - "loss": 1.2349, - "step": 769 - }, - { - "epoch": 0.5827814569536424, - "grad_norm": 2.359560489654541, - "learning_rate": 1.837540778144839e-05, - "loss": 1.2146, - "step": 770 - }, - { - "epoch": 0.5835383159886471, - "grad_norm": 3.088444709777832, - "learning_rate": 1.8371018694696155e-05, - "loss": 1.2667, - "step": 771 - }, - { - "epoch": 0.5842951750236518, - "grad_norm": 2.766091823577881, - "learning_rate": 1.836662421275752e-05, - "loss": 1.218, - "step": 772 - }, - { - "epoch": 0.5850520340586566, - "grad_norm": 2.739274263381958, - "learning_rate": 1.8362224338465093e-05, - "loss": 1.2618, - "step": 773 - }, - { - "epoch": 0.5858088930936614, - "grad_norm": 4.742860794067383, - "learning_rate": 1.835781907465497e-05, - "loss": 1.2989, - "step": 774 - }, - { - "epoch": 0.586565752128666, - "grad_norm": 3.0373001098632812, - "learning_rate": 1.8353408424166712e-05, - "loss": 1.283, - "step": 775 - }, - { - "epoch": 0.5873226111636708, - "grad_norm": 2.5657973289489746, - "learning_rate": 1.8348992389843365e-05, - "loss": 1.1942, - "step": 776 - }, - { - "epoch": 0.5880794701986755, - "grad_norm": 2.7128591537475586, - "learning_rate": 1.834457097453143e-05, - "loss": 1.262, - "step": 777 - }, - { - "epoch": 0.5888363292336802, - "grad_norm": 2.4917023181915283, - "learning_rate": 1.834014418108089e-05, - "loss": 1.2194, - "step": 778 - }, - { - "epoch": 0.589593188268685, - "grad_norm": 2.7309277057647705, - "learning_rate": 1.8335712012345188e-05, - "loss": 1.231, - "step": 779 - }, - { - "epoch": 0.5903500473036897, - "grad_norm": 2.894216537475586, - "learning_rate": 1.8331274471181224e-05, - "loss": 1.234, - "step": 780 - }, - { - "epoch": 0.5911069063386944, - "grad_norm": 2.491863250732422, - "learning_rate": 1.8326831560449375e-05, - "loss": 1.2335, - "step": 781 - }, - { - "epoch": 0.5918637653736991, - "grad_norm": 2.636247396469116, - "learning_rate": 1.832238328301348e-05, - "loss": 1.2371, - "step": 782 - }, - { - "epoch": 0.5926206244087039, - "grad_norm": 2.6232643127441406, - "learning_rate": 1.831792964174082e-05, - "loss": 1.2626, - "step": 783 - }, - { - "epoch": 0.5933774834437087, - "grad_norm": 2.667076826095581, - "learning_rate": 1.8313470639502148e-05, - "loss": 1.257, - "step": 784 - }, - { - "epoch": 0.5941343424787133, - "grad_norm": 2.936359405517578, - "learning_rate": 1.8309006279171675e-05, - "loss": 1.2509, - "step": 785 - }, - { - "epoch": 0.5948912015137181, - "grad_norm": 2.522406578063965, - "learning_rate": 1.8304536563627052e-05, - "loss": 1.2804, - "step": 786 - }, - { - "epoch": 0.5956480605487228, - "grad_norm": 2.542407512664795, - "learning_rate": 1.830006149574939e-05, - "loss": 1.2393, - "step": 787 - }, - { - "epoch": 0.5964049195837275, - "grad_norm": 2.5919876098632812, - "learning_rate": 1.8295581078423253e-05, - "loss": 1.2622, - "step": 788 - }, - { - "epoch": 0.5971617786187322, - "grad_norm": 2.7095932960510254, - "learning_rate": 1.8291095314536647e-05, - "loss": 1.2491, - "step": 789 - }, - { - "epoch": 0.597918637653737, - "grad_norm": 2.4110512733459473, - "learning_rate": 1.8286604206981028e-05, - "loss": 1.2622, - "step": 790 - }, - { - "epoch": 0.5986754966887418, - "grad_norm": 2.7041079998016357, - "learning_rate": 1.8282107758651295e-05, - "loss": 1.2563, - "step": 791 - }, - { - "epoch": 0.5994323557237464, - "grad_norm": 2.7525973320007324, - "learning_rate": 1.827760597244579e-05, - "loss": 1.2449, - "step": 792 - }, - { - "epoch": 0.6001892147587512, - "grad_norm": 2.612968921661377, - "learning_rate": 1.8273098851266297e-05, - "loss": 1.258, - "step": 793 - }, - { - "epoch": 0.6009460737937559, - "grad_norm": 2.6070921421051025, - "learning_rate": 1.826858639801804e-05, - "loss": 1.3045, - "step": 794 - }, - { - "epoch": 0.6017029328287606, - "grad_norm": 2.4890692234039307, - "learning_rate": 1.8264068615609668e-05, - "loss": 1.2253, - "step": 795 - }, - { - "epoch": 0.6024597918637654, - "grad_norm": 2.9760918617248535, - "learning_rate": 1.8259545506953285e-05, - "loss": 1.2673, - "step": 796 - }, - { - "epoch": 0.6032166508987701, - "grad_norm": 2.8577773571014404, - "learning_rate": 1.825501707496441e-05, - "loss": 1.264, - "step": 797 - }, - { - "epoch": 0.6039735099337749, - "grad_norm": 2.549546718597412, - "learning_rate": 1.825048332256201e-05, - "loss": 1.2228, - "step": 798 - }, - { - "epoch": 0.6047303689687795, - "grad_norm": 2.7687017917633057, - "learning_rate": 1.8245944252668462e-05, - "loss": 1.2522, - "step": 799 - }, - { - "epoch": 0.6054872280037843, - "grad_norm": 2.533287763595581, - "learning_rate": 1.824139986820959e-05, - "loss": 1.1939, - "step": 800 - }, - { - "epoch": 0.6062440870387891, - "grad_norm": 2.6402809619903564, - "learning_rate": 1.8236850172114633e-05, - "loss": 1.2417, - "step": 801 - }, - { - "epoch": 0.6070009460737937, - "grad_norm": 2.592946767807007, - "learning_rate": 1.8232295167316252e-05, - "loss": 1.2922, - "step": 802 - }, - { - "epoch": 0.6077578051087985, - "grad_norm": 2.6012048721313477, - "learning_rate": 1.8227734856750537e-05, - "loss": 1.2658, - "step": 803 - }, - { - "epoch": 0.6085146641438032, - "grad_norm": 2.737257242202759, - "learning_rate": 1.8223169243356995e-05, - "loss": 1.2955, - "step": 804 - }, - { - "epoch": 0.609271523178808, - "grad_norm": 2.8576440811157227, - "learning_rate": 1.8218598330078548e-05, - "loss": 1.261, - "step": 805 - }, - { - "epoch": 0.6100283822138127, - "grad_norm": 4.944385051727295, - "learning_rate": 1.8214022119861537e-05, - "loss": 1.2438, - "step": 806 - }, - { - "epoch": 0.6107852412488174, - "grad_norm": 2.8472225666046143, - "learning_rate": 1.820944061565572e-05, - "loss": 1.2305, - "step": 807 - }, - { - "epoch": 0.6115421002838222, - "grad_norm": 2.8943638801574707, - "learning_rate": 1.8204853820414267e-05, - "loss": 1.2608, - "step": 808 - }, - { - "epoch": 0.6122989593188268, - "grad_norm": 2.523142099380493, - "learning_rate": 1.820026173709375e-05, - "loss": 1.2721, - "step": 809 - }, - { - "epoch": 0.6130558183538316, - "grad_norm": 2.8089590072631836, - "learning_rate": 1.8195664368654157e-05, - "loss": 1.222, - "step": 810 - }, - { - "epoch": 0.6138126773888364, - "grad_norm": 2.9274590015411377, - "learning_rate": 1.8191061718058885e-05, - "loss": 1.2534, - "step": 811 - }, - { - "epoch": 0.614569536423841, - "grad_norm": 2.6819167137145996, - "learning_rate": 1.818645378827473e-05, - "loss": 1.2566, - "step": 812 - }, - { - "epoch": 0.6153263954588458, - "grad_norm": 2.5687010288238525, - "learning_rate": 1.8181840582271897e-05, - "loss": 1.2323, - "step": 813 - }, - { - "epoch": 0.6160832544938505, - "grad_norm": 2.636622428894043, - "learning_rate": 1.8177222103023983e-05, - "loss": 1.2007, - "step": 814 - }, - { - "epoch": 0.6168401135288553, - "grad_norm": 2.5585618019104004, - "learning_rate": 1.8172598353507988e-05, - "loss": 1.2169, - "step": 815 - }, - { - "epoch": 0.61759697256386, - "grad_norm": 2.880889415740967, - "learning_rate": 1.8167969336704322e-05, - "loss": 1.2211, - "step": 816 - }, - { - "epoch": 0.6183538315988647, - "grad_norm": 2.575530767440796, - "learning_rate": 1.8163335055596764e-05, - "loss": 1.2165, - "step": 817 - }, - { - "epoch": 0.6191106906338695, - "grad_norm": 2.65857195854187, - "learning_rate": 1.815869551317251e-05, - "loss": 1.2527, - "step": 818 - }, - { - "epoch": 0.6198675496688741, - "grad_norm": 2.7308692932128906, - "learning_rate": 1.8154050712422135e-05, - "loss": 1.245, - "step": 819 - }, - { - "epoch": 0.6206244087038789, - "grad_norm": 2.4128143787384033, - "learning_rate": 1.8149400656339606e-05, - "loss": 1.2274, - "step": 820 - }, - { - "epoch": 0.6213812677388836, - "grad_norm": 2.678269386291504, - "learning_rate": 1.8144745347922282e-05, - "loss": 1.2348, - "step": 821 - }, - { - "epoch": 0.6221381267738884, - "grad_norm": 2.4970011711120605, - "learning_rate": 1.81400847901709e-05, - "loss": 1.2525, - "step": 822 - }, - { - "epoch": 0.6228949858088931, - "grad_norm": 3.0284082889556885, - "learning_rate": 1.813541898608959e-05, - "loss": 1.2283, - "step": 823 - }, - { - "epoch": 0.6236518448438978, - "grad_norm": 2.5325472354888916, - "learning_rate": 1.813074793868585e-05, - "loss": 1.2177, - "step": 824 - }, - { - "epoch": 0.6244087038789026, - "grad_norm": 2.8422694206237793, - "learning_rate": 1.8126071650970566e-05, - "loss": 1.1957, - "step": 825 - }, - { - "epoch": 0.6251655629139072, - "grad_norm": 2.7805769443511963, - "learning_rate": 1.8121390125958012e-05, - "loss": 1.2406, - "step": 826 - }, - { - "epoch": 0.625922421948912, - "grad_norm": 3.035707473754883, - "learning_rate": 1.811670336666582e-05, - "loss": 1.2217, - "step": 827 - }, - { - "epoch": 0.6266792809839168, - "grad_norm": 2.6617417335510254, - "learning_rate": 1.8112011376115004e-05, - "loss": 1.2489, - "step": 828 - }, - { - "epoch": 0.6274361400189215, - "grad_norm": 2.593369722366333, - "learning_rate": 1.8107314157329953e-05, - "loss": 1.2582, - "step": 829 - }, - { - "epoch": 0.6281929990539262, - "grad_norm": 2.33566951751709, - "learning_rate": 1.810261171333842e-05, - "loss": 1.1726, - "step": 830 - }, - { - "epoch": 0.6289498580889309, - "grad_norm": 2.6399929523468018, - "learning_rate": 1.8097904047171525e-05, - "loss": 1.233, - "step": 831 - }, - { - "epoch": 0.6297067171239357, - "grad_norm": 2.833388328552246, - "learning_rate": 1.8093191161863765e-05, - "loss": 1.2465, - "step": 832 - }, - { - "epoch": 0.6304635761589404, - "grad_norm": 2.5618953704833984, - "learning_rate": 1.808847306045299e-05, - "loss": 1.2786, - "step": 833 - }, - { - "epoch": 0.6312204351939451, - "grad_norm": 2.449512004852295, - "learning_rate": 1.8083749745980417e-05, - "loss": 1.1866, - "step": 834 - }, - { - "epoch": 0.6319772942289499, - "grad_norm": 2.3261687755584717, - "learning_rate": 1.8079021221490623e-05, - "loss": 1.2293, - "step": 835 - }, - { - "epoch": 0.6327341532639545, - "grad_norm": 2.2670247554779053, - "learning_rate": 1.8074287490031544e-05, - "loss": 1.2307, - "step": 836 - }, - { - "epoch": 0.6334910122989593, - "grad_norm": 2.9090189933776855, - "learning_rate": 1.8069548554654465e-05, - "loss": 1.23, - "step": 837 - }, - { - "epoch": 0.6342478713339641, - "grad_norm": 2.2023513317108154, - "learning_rate": 1.8064804418414036e-05, - "loss": 1.2559, - "step": 838 - }, - { - "epoch": 0.6350047303689688, - "grad_norm": 2.3907856941223145, - "learning_rate": 1.8060055084368256e-05, - "loss": 1.1783, - "step": 839 - }, - { - "epoch": 0.6357615894039735, - "grad_norm": 2.7036445140838623, - "learning_rate": 1.805530055557847e-05, - "loss": 1.2268, - "step": 840 - }, - { - "epoch": 0.6365184484389782, - "grad_norm": 2.429286003112793, - "learning_rate": 1.805054083510938e-05, - "loss": 1.1904, - "step": 841 - }, - { - "epoch": 0.637275307473983, - "grad_norm": 2.644791603088379, - "learning_rate": 1.804577592602902e-05, - "loss": 1.1866, - "step": 842 - }, - { - "epoch": 0.6380321665089878, - "grad_norm": 2.7880802154541016, - "learning_rate": 1.804100583140879e-05, - "loss": 1.2817, - "step": 843 - }, - { - "epoch": 0.6387890255439924, - "grad_norm": 2.485358476638794, - "learning_rate": 1.8036230554323413e-05, - "loss": 1.281, - "step": 844 - }, - { - "epoch": 0.6395458845789972, - "grad_norm": 2.5849761962890625, - "learning_rate": 1.803145009785096e-05, - "loss": 1.248, - "step": 845 - }, - { - "epoch": 0.6403027436140019, - "grad_norm": 2.357409715652466, - "learning_rate": 1.8026664465072838e-05, - "loss": 1.2828, - "step": 846 - }, - { - "epoch": 0.6410596026490066, - "grad_norm": 2.4510414600372314, - "learning_rate": 1.80218736590738e-05, - "loss": 1.2275, - "step": 847 - }, - { - "epoch": 0.6418164616840114, - "grad_norm": 2.625035524368286, - "learning_rate": 1.8017077682941918e-05, - "loss": 1.2369, - "step": 848 - }, - { - "epoch": 0.6425733207190161, - "grad_norm": 2.4510104656219482, - "learning_rate": 1.8012276539768613e-05, - "loss": 1.2624, - "step": 849 - }, - { - "epoch": 0.6433301797540208, - "grad_norm": 2.6468582153320312, - "learning_rate": 1.800747023264862e-05, - "loss": 1.2964, - "step": 850 - }, - { - "epoch": 0.6440870387890255, - "grad_norm": 2.45991587638855, - "learning_rate": 1.800265876468002e-05, - "loss": 1.2359, - "step": 851 - }, - { - "epoch": 0.6448438978240303, - "grad_norm": 2.546734571456909, - "learning_rate": 1.799784213896421e-05, - "loss": 1.2124, - "step": 852 - }, - { - "epoch": 0.645600756859035, - "grad_norm": 2.265397787094116, - "learning_rate": 1.799302035860591e-05, - "loss": 1.1945, - "step": 853 - }, - { - "epoch": 0.6463576158940397, - "grad_norm": 2.4162395000457764, - "learning_rate": 1.7988193426713165e-05, - "loss": 1.2115, - "step": 854 - }, - { - "epoch": 0.6471144749290445, - "grad_norm": 2.2301483154296875, - "learning_rate": 1.7983361346397347e-05, - "loss": 1.2699, - "step": 855 - }, - { - "epoch": 0.6478713339640492, - "grad_norm": 2.2673699855804443, - "learning_rate": 1.797852412077314e-05, - "loss": 1.2525, - "step": 856 - }, - { - "epoch": 0.6486281929990539, - "grad_norm": 2.5041098594665527, - "learning_rate": 1.7973681752958543e-05, - "loss": 1.231, - "step": 857 - }, - { - "epoch": 0.6493850520340586, - "grad_norm": 2.5438284873962402, - "learning_rate": 1.7968834246074875e-05, - "loss": 1.2316, - "step": 858 - }, - { - "epoch": 0.6501419110690634, - "grad_norm": 2.4436419010162354, - "learning_rate": 1.7963981603246762e-05, - "loss": 1.2461, - "step": 859 - }, - { - "epoch": 0.6508987701040682, - "grad_norm": 2.3260018825531006, - "learning_rate": 1.795912382760215e-05, - "loss": 1.2575, - "step": 860 - }, - { - "epoch": 0.6516556291390728, - "grad_norm": 2.527569532394409, - "learning_rate": 1.7954260922272278e-05, - "loss": 1.2552, - "step": 861 - }, - { - "epoch": 0.6524124881740776, - "grad_norm": 2.5068411827087402, - "learning_rate": 1.7949392890391706e-05, - "loss": 1.2439, - "step": 862 - }, - { - "epoch": 0.6531693472090823, - "grad_norm": 2.8131117820739746, - "learning_rate": 1.7944519735098295e-05, - "loss": 1.2669, - "step": 863 - }, - { - "epoch": 0.653926206244087, - "grad_norm": 2.368083953857422, - "learning_rate": 1.79396414595332e-05, - "loss": 1.273, - "step": 864 - }, - { - "epoch": 0.6546830652790918, - "grad_norm": 2.4757819175720215, - "learning_rate": 1.7934758066840893e-05, - "loss": 1.2652, - "step": 865 - }, - { - "epoch": 0.6554399243140965, - "grad_norm": 2.7727437019348145, - "learning_rate": 1.7929869560169123e-05, - "loss": 1.2661, - "step": 866 - }, - { - "epoch": 0.6561967833491013, - "grad_norm": 2.5417017936706543, - "learning_rate": 1.7924975942668954e-05, - "loss": 1.2624, - "step": 867 - }, - { - "epoch": 0.6569536423841059, - "grad_norm": 3.0404696464538574, - "learning_rate": 1.792007721749474e-05, - "loss": 1.2149, - "step": 868 - }, - { - "epoch": 0.6577105014191107, - "grad_norm": 2.528648853302002, - "learning_rate": 1.7915173387804115e-05, - "loss": 1.2536, - "step": 869 - }, - { - "epoch": 0.6584673604541155, - "grad_norm": 2.5994584560394287, - "learning_rate": 1.791026445675802e-05, - "loss": 1.2146, - "step": 870 - }, - { - "epoch": 0.6592242194891201, - "grad_norm": 2.523890495300293, - "learning_rate": 1.7905350427520672e-05, - "loss": 1.2599, - "step": 871 - }, - { - "epoch": 0.6599810785241249, - "grad_norm": 3.055417537689209, - "learning_rate": 1.7900431303259585e-05, - "loss": 1.2447, - "step": 872 - }, - { - "epoch": 0.6607379375591296, - "grad_norm": 2.5144965648651123, - "learning_rate": 1.789550708714555e-05, - "loss": 1.2022, - "step": 873 - }, - { - "epoch": 0.6614947965941343, - "grad_norm": 2.5344860553741455, - "learning_rate": 1.789057778235264e-05, - "loss": 1.2578, - "step": 874 - }, - { - "epoch": 0.6622516556291391, - "grad_norm": 2.7370986938476562, - "learning_rate": 1.7885643392058207e-05, - "loss": 1.1964, - "step": 875 - }, - { - "epoch": 0.6630085146641438, - "grad_norm": 2.831005573272705, - "learning_rate": 1.7880703919442885e-05, - "loss": 1.2218, - "step": 876 - }, - { - "epoch": 0.6637653736991486, - "grad_norm": 2.2660505771636963, - "learning_rate": 1.787575936769059e-05, - "loss": 1.232, - "step": 877 - }, - { - "epoch": 0.6645222327341532, - "grad_norm": 2.5577943325042725, - "learning_rate": 1.78708097399885e-05, - "loss": 1.289, - "step": 878 - }, - { - "epoch": 0.665279091769158, - "grad_norm": 2.8066608905792236, - "learning_rate": 1.786585503952707e-05, - "loss": 1.2051, - "step": 879 - }, - { - "epoch": 0.6660359508041628, - "grad_norm": 2.683680295944214, - "learning_rate": 1.786089526950002e-05, - "loss": 1.2343, - "step": 880 - }, - { - "epoch": 0.6667928098391674, - "grad_norm": 2.571253538131714, - "learning_rate": 1.785593043310434e-05, - "loss": 1.2279, - "step": 881 - }, - { - "epoch": 0.6675496688741722, - "grad_norm": 2.2818214893341064, - "learning_rate": 1.78509605335403e-05, - "loss": 1.2346, - "step": 882 - }, - { - "epoch": 0.6683065279091769, - "grad_norm": 2.427520513534546, - "learning_rate": 1.7845985574011413e-05, - "loss": 1.251, - "step": 883 - }, - { - "epoch": 0.6690633869441817, - "grad_norm": 2.615901231765747, - "learning_rate": 1.784100555772446e-05, - "loss": 1.2697, - "step": 884 - }, - { - "epoch": 0.6698202459791863, - "grad_norm": 2.3778128623962402, - "learning_rate": 1.7836020487889495e-05, - "loss": 1.2291, - "step": 885 - }, - { - "epoch": 0.6705771050141911, - "grad_norm": 2.4669504165649414, - "learning_rate": 1.7831030367719802e-05, - "loss": 1.2365, - "step": 886 - }, - { - "epoch": 0.6713339640491959, - "grad_norm": 2.397721290588379, - "learning_rate": 1.782603520043195e-05, - "loss": 1.2718, - "step": 887 - }, - { - "epoch": 0.6720908230842005, - "grad_norm": 2.323598623275757, - "learning_rate": 1.782103498924574e-05, - "loss": 1.2706, - "step": 888 - }, - { - "epoch": 0.6728476821192053, - "grad_norm": 2.592615842819214, - "learning_rate": 1.7816029737384234e-05, - "loss": 1.2821, - "step": 889 - }, - { - "epoch": 0.67360454115421, - "grad_norm": 2.552388906478882, - "learning_rate": 1.7811019448073742e-05, - "loss": 1.2075, - "step": 890 - }, - { - "epoch": 0.6743614001892148, - "grad_norm": 2.659424304962158, - "learning_rate": 1.7806004124543818e-05, - "loss": 1.2365, - "step": 891 - }, - { - "epoch": 0.6751182592242195, - "grad_norm": 2.596625328063965, - "learning_rate": 1.7800983770027266e-05, - "loss": 1.2685, - "step": 892 - }, - { - "epoch": 0.6758751182592242, - "grad_norm": 2.485259771347046, - "learning_rate": 1.779595838776013e-05, - "loss": 1.2453, - "step": 893 - }, - { - "epoch": 0.676631977294229, - "grad_norm": 2.3858642578125, - "learning_rate": 1.7790927980981687e-05, - "loss": 1.1896, - "step": 894 - }, - { - "epoch": 0.6773888363292336, - "grad_norm": 2.53601336479187, - "learning_rate": 1.7785892552934468e-05, - "loss": 1.2533, - "step": 895 - }, - { - "epoch": 0.6781456953642384, - "grad_norm": 2.7505080699920654, - "learning_rate": 1.778085210686423e-05, - "loss": 1.2449, - "step": 896 - }, - { - "epoch": 0.6789025543992432, - "grad_norm": 2.4080655574798584, - "learning_rate": 1.7775806646019974e-05, - "loss": 1.1985, - "step": 897 - }, - { - "epoch": 0.6796594134342478, - "grad_norm": 2.742640972137451, - "learning_rate": 1.7770756173653923e-05, - "loss": 1.2434, - "step": 898 - }, - { - "epoch": 0.6804162724692526, - "grad_norm": 2.377990484237671, - "learning_rate": 1.776570069302153e-05, - "loss": 1.1726, - "step": 899 - }, - { - "epoch": 0.6811731315042573, - "grad_norm": 2.35687518119812, - "learning_rate": 1.7760640207381486e-05, - "loss": 1.2189, - "step": 900 - }, - { - "epoch": 0.6819299905392621, - "grad_norm": 2.576018810272217, - "learning_rate": 1.77555747199957e-05, - "loss": 1.2318, - "step": 901 - }, - { - "epoch": 0.6826868495742668, - "grad_norm": 2.3314318656921387, - "learning_rate": 1.7750504234129312e-05, - "loss": 1.1889, - "step": 902 - }, - { - "epoch": 0.6834437086092715, - "grad_norm": 2.3357717990875244, - "learning_rate": 1.7745428753050675e-05, - "loss": 1.2168, - "step": 903 - }, - { - "epoch": 0.6842005676442763, - "grad_norm": 2.2540555000305176, - "learning_rate": 1.774034828003137e-05, - "loss": 1.2017, - "step": 904 - }, - { - "epoch": 0.684957426679281, - "grad_norm": 2.325144052505493, - "learning_rate": 1.773526281834619e-05, - "loss": 1.2409, - "step": 905 - }, - { - "epoch": 0.6857142857142857, - "grad_norm": 2.731501340866089, - "learning_rate": 1.7730172371273147e-05, - "loss": 1.2765, - "step": 906 - }, - { - "epoch": 0.6864711447492905, - "grad_norm": 2.3535265922546387, - "learning_rate": 1.7725076942093468e-05, - "loss": 1.2353, - "step": 907 - }, - { - "epoch": 0.6872280037842952, - "grad_norm": 2.852663040161133, - "learning_rate": 1.7719976534091584e-05, - "loss": 1.2761, - "step": 908 - }, - { - "epoch": 0.6879848628192999, - "grad_norm": 2.5675928592681885, - "learning_rate": 1.7714871150555146e-05, - "loss": 1.1906, - "step": 909 - }, - { - "epoch": 0.6887417218543046, - "grad_norm": 2.1720049381256104, - "learning_rate": 1.7709760794775e-05, - "loss": 1.2057, - "step": 910 - }, - { - "epoch": 0.6894985808893094, - "grad_norm": 2.567373514175415, - "learning_rate": 1.7704645470045213e-05, - "loss": 1.2365, - "step": 911 - }, - { - "epoch": 0.6902554399243142, - "grad_norm": 2.38577938079834, - "learning_rate": 1.7699525179663034e-05, - "loss": 1.2047, - "step": 912 - }, - { - "epoch": 0.6910122989593188, - "grad_norm": 2.3595142364501953, - "learning_rate": 1.7694399926928932e-05, - "loss": 1.2329, - "step": 913 - }, - { - "epoch": 0.6917691579943236, - "grad_norm": 2.7524566650390625, - "learning_rate": 1.7689269715146562e-05, - "loss": 1.2461, - "step": 914 - }, - { - "epoch": 0.6925260170293283, - "grad_norm": 2.2120566368103027, - "learning_rate": 1.768413454762278e-05, - "loss": 1.2232, - "step": 915 - }, - { - "epoch": 0.693282876064333, - "grad_norm": 2.491506338119507, - "learning_rate": 1.767899442766764e-05, - "loss": 1.2185, - "step": 916 - }, - { - "epoch": 0.6940397350993377, - "grad_norm": 2.299386501312256, - "learning_rate": 1.7673849358594387e-05, - "loss": 1.2146, - "step": 917 - }, - { - "epoch": 0.6947965941343425, - "grad_norm": 2.367396831512451, - "learning_rate": 1.766869934371945e-05, - "loss": 1.2666, - "step": 918 - }, - { - "epoch": 0.6955534531693472, - "grad_norm": 2.379352331161499, - "learning_rate": 1.766354438636245e-05, - "loss": 1.2295, - "step": 919 - }, - { - "epoch": 0.6963103122043519, - "grad_norm": 2.91322660446167, - "learning_rate": 1.7658384489846197e-05, - "loss": 1.2211, - "step": 920 - }, - { - "epoch": 0.6970671712393567, - "grad_norm": 2.3727736473083496, - "learning_rate": 1.7653219657496675e-05, - "loss": 1.2478, - "step": 921 - }, - { - "epoch": 0.6978240302743614, - "grad_norm": 2.3029327392578125, - "learning_rate": 1.7648049892643064e-05, - "loss": 1.238, - "step": 922 - }, - { - "epoch": 0.6985808893093661, - "grad_norm": 2.3356475830078125, - "learning_rate": 1.7642875198617715e-05, - "loss": 1.1932, - "step": 923 - }, - { - "epoch": 0.6993377483443709, - "grad_norm": 2.5331709384918213, - "learning_rate": 1.7637695578756148e-05, - "loss": 1.1822, - "step": 924 - }, - { - "epoch": 0.7000946073793756, - "grad_norm": 2.714674949645996, - "learning_rate": 1.7632511036397078e-05, - "loss": 1.2454, - "step": 925 - }, - { - "epoch": 0.7008514664143803, - "grad_norm": 2.497758388519287, - "learning_rate": 1.7627321574882373e-05, - "loss": 1.2552, - "step": 926 - }, - { - "epoch": 0.701608325449385, - "grad_norm": 2.6237785816192627, - "learning_rate": 1.7622127197557085e-05, - "loss": 1.2334, - "step": 927 - }, - { - "epoch": 0.7023651844843898, - "grad_norm": 2.4308512210845947, - "learning_rate": 1.7616927907769436e-05, - "loss": 1.2516, - "step": 928 - }, - { - "epoch": 0.7031220435193946, - "grad_norm": 2.1913723945617676, - "learning_rate": 1.7611723708870797e-05, - "loss": 1.267, - "step": 929 - }, - { - "epoch": 0.7038789025543992, - "grad_norm": 2.6569485664367676, - "learning_rate": 1.7606514604215723e-05, - "loss": 1.2301, - "step": 930 - }, - { - "epoch": 0.704635761589404, - "grad_norm": 2.4195547103881836, - "learning_rate": 1.7601300597161918e-05, - "loss": 1.2464, - "step": 931 - }, - { - "epoch": 0.7053926206244087, - "grad_norm": 2.580186128616333, - "learning_rate": 1.7596081691070262e-05, - "loss": 1.2432, - "step": 932 - }, - { - "epoch": 0.7061494796594134, - "grad_norm": 2.4679551124572754, - "learning_rate": 1.759085788930477e-05, - "loss": 1.23, - "step": 933 - }, - { - "epoch": 0.7069063386944182, - "grad_norm": 2.256150722503662, - "learning_rate": 1.7585629195232633e-05, - "loss": 1.2362, - "step": 934 - }, - { - "epoch": 0.7076631977294229, - "grad_norm": 2.7825927734375, - "learning_rate": 1.7580395612224184e-05, - "loss": 1.2542, - "step": 935 - }, - { - "epoch": 0.7084200567644277, - "grad_norm": 2.568265676498413, - "learning_rate": 1.757515714365291e-05, - "loss": 1.3004, - "step": 936 - }, - { - "epoch": 0.7091769157994323, - "grad_norm": 2.422884702682495, - "learning_rate": 1.7569913792895455e-05, - "loss": 1.2382, - "step": 937 - }, - { - "epoch": 0.7099337748344371, - "grad_norm": 2.4318430423736572, - "learning_rate": 1.7564665563331597e-05, - "loss": 1.2421, - "step": 938 - }, - { - "epoch": 0.7106906338694419, - "grad_norm": 2.4188950061798096, - "learning_rate": 1.755941245834426e-05, - "loss": 1.244, - "step": 939 - }, - { - "epoch": 0.7114474929044465, - "grad_norm": 2.37963604927063, - "learning_rate": 1.7554154481319523e-05, - "loss": 1.2137, - "step": 940 - }, - { - "epoch": 0.7122043519394513, - "grad_norm": 2.5548665523529053, - "learning_rate": 1.7548891635646595e-05, - "loss": 1.241, - "step": 941 - }, - { - "epoch": 0.712961210974456, - "grad_norm": 2.384345054626465, - "learning_rate": 1.7543623924717827e-05, - "loss": 1.2299, - "step": 942 - }, - { - "epoch": 0.7137180700094607, - "grad_norm": 2.459399461746216, - "learning_rate": 1.7538351351928705e-05, - "loss": 1.2242, - "step": 943 - }, - { - "epoch": 0.7144749290444655, - "grad_norm": 2.2715373039245605, - "learning_rate": 1.7533073920677847e-05, - "loss": 1.2264, - "step": 944 - }, - { - "epoch": 0.7152317880794702, - "grad_norm": 2.423783302307129, - "learning_rate": 1.752779163436701e-05, - "loss": 1.2061, - "step": 945 - }, - { - "epoch": 0.715988647114475, - "grad_norm": 2.368046283721924, - "learning_rate": 1.7522504496401068e-05, - "loss": 1.2568, - "step": 946 - }, - { - "epoch": 0.7167455061494796, - "grad_norm": 2.7439255714416504, - "learning_rate": 1.7517212510188034e-05, - "loss": 1.2123, - "step": 947 - }, - { - "epoch": 0.7175023651844844, - "grad_norm": 2.3615167140960693, - "learning_rate": 1.751191567913904e-05, - "loss": 1.2188, - "step": 948 - }, - { - "epoch": 0.7182592242194891, - "grad_norm": 2.394190549850464, - "learning_rate": 1.7506614006668346e-05, - "loss": 1.21, - "step": 949 - }, - { - "epoch": 0.7190160832544938, - "grad_norm": 2.2254350185394287, - "learning_rate": 1.7501307496193324e-05, - "loss": 1.2306, - "step": 950 - }, - { - "epoch": 0.7197729422894986, - "grad_norm": 2.734381675720215, - "learning_rate": 1.749599615113447e-05, - "loss": 1.2367, - "step": 951 - }, - { - "epoch": 0.7205298013245033, - "grad_norm": 2.4236867427825928, - "learning_rate": 1.7490679974915404e-05, - "loss": 1.2013, - "step": 952 - }, - { - "epoch": 0.7212866603595081, - "grad_norm": 2.4105286598205566, - "learning_rate": 1.748535897096284e-05, - "loss": 1.1849, - "step": 953 - }, - { - "epoch": 0.7220435193945127, - "grad_norm": 3.396277666091919, - "learning_rate": 1.7480033142706626e-05, - "loss": 1.2018, - "step": 954 - }, - { - "epoch": 0.7228003784295175, - "grad_norm": 2.49308180809021, - "learning_rate": 1.7474702493579704e-05, - "loss": 1.2533, - "step": 955 - }, - { - "epoch": 0.7235572374645223, - "grad_norm": 2.2357521057128906, - "learning_rate": 1.7469367027018134e-05, - "loss": 1.253, - "step": 956 - }, - { - "epoch": 0.7243140964995269, - "grad_norm": 2.3083794116973877, - "learning_rate": 1.746402674646107e-05, - "loss": 1.2062, - "step": 957 - }, - { - "epoch": 0.7250709555345317, - "grad_norm": 2.087985038757324, - "learning_rate": 1.745868165535078e-05, - "loss": 1.2146, - "step": 958 - }, - { - "epoch": 0.7258278145695364, - "grad_norm": 2.1703999042510986, - "learning_rate": 1.7453331757132627e-05, - "loss": 1.2593, - "step": 959 - }, - { - "epoch": 0.7265846736045412, - "grad_norm": 2.644440174102783, - "learning_rate": 1.7447977055255076e-05, - "loss": 1.2744, - "step": 960 - }, - { - "epoch": 0.7273415326395459, - "grad_norm": 2.2902777194976807, - "learning_rate": 1.744261755316968e-05, - "loss": 1.2336, - "step": 961 - }, - { - "epoch": 0.7280983916745506, - "grad_norm": 2.1898083686828613, - "learning_rate": 1.7437253254331103e-05, - "loss": 1.1872, - "step": 962 - }, - { - "epoch": 0.7288552507095554, - "grad_norm": 2.192096710205078, - "learning_rate": 1.7431884162197076e-05, - "loss": 1.1904, - "step": 963 - }, - { - "epoch": 0.72961210974456, - "grad_norm": 2.344484806060791, - "learning_rate": 1.7426510280228447e-05, - "loss": 1.2086, - "step": 964 - }, - { - "epoch": 0.7303689687795648, - "grad_norm": 2.4366836547851562, - "learning_rate": 1.742113161188913e-05, - "loss": 1.2367, - "step": 965 - }, - { - "epoch": 0.7311258278145696, - "grad_norm": 2.3846473693847656, - "learning_rate": 1.7415748160646136e-05, - "loss": 1.2182, - "step": 966 - }, - { - "epoch": 0.7318826868495742, - "grad_norm": 2.432124614715576, - "learning_rate": 1.7410359929969555e-05, - "loss": 1.2345, - "step": 967 - }, - { - "epoch": 0.732639545884579, - "grad_norm": 2.427494525909424, - "learning_rate": 1.7404966923332558e-05, - "loss": 1.2284, - "step": 968 - }, - { - "epoch": 0.7333964049195837, - "grad_norm": 2.3191261291503906, - "learning_rate": 1.73995691442114e-05, - "loss": 1.2092, - "step": 969 - }, - { - "epoch": 0.7341532639545885, - "grad_norm": 2.3739922046661377, - "learning_rate": 1.7394166596085393e-05, - "loss": 1.2276, - "step": 970 - }, - { - "epoch": 0.7349101229895932, - "grad_norm": 2.29589581489563, - "learning_rate": 1.7388759282436953e-05, - "loss": 1.2422, - "step": 971 - }, - { - "epoch": 0.7356669820245979, - "grad_norm": 2.3834304809570312, - "learning_rate": 1.7383347206751542e-05, - "loss": 1.252, - "step": 972 - }, - { - "epoch": 0.7364238410596027, - "grad_norm": 2.2572319507598877, - "learning_rate": 1.7377930372517705e-05, - "loss": 1.2296, - "step": 973 - }, - { - "epoch": 0.7371807000946073, - "grad_norm": 2.6052353382110596, - "learning_rate": 1.7372508783227052e-05, - "loss": 1.2131, - "step": 974 - }, - { - "epoch": 0.7379375591296121, - "grad_norm": 2.4882845878601074, - "learning_rate": 1.7367082442374255e-05, - "loss": 1.2259, - "step": 975 - }, - { - "epoch": 0.7386944181646169, - "grad_norm": 2.301111936569214, - "learning_rate": 1.7361651353457053e-05, - "loss": 1.191, - "step": 976 - }, - { - "epoch": 0.7394512771996216, - "grad_norm": 2.496601104736328, - "learning_rate": 1.7356215519976236e-05, - "loss": 1.1749, - "step": 977 - }, - { - "epoch": 0.7402081362346263, - "grad_norm": 2.4782116413116455, - "learning_rate": 1.7350774945435667e-05, - "loss": 1.2282, - "step": 978 - }, - { - "epoch": 0.740964995269631, - "grad_norm": 2.3096814155578613, - "learning_rate": 1.7345329633342253e-05, - "loss": 1.1811, - "step": 979 - }, - { - "epoch": 0.7417218543046358, - "grad_norm": 2.5999755859375, - "learning_rate": 1.7339879587205966e-05, - "loss": 1.2081, - "step": 980 - }, - { - "epoch": 0.7424787133396404, - "grad_norm": 2.3727262020111084, - "learning_rate": 1.733442481053981e-05, - "loss": 1.2392, - "step": 981 - }, - { - "epoch": 0.7432355723746452, - "grad_norm": 2.621267318725586, - "learning_rate": 1.7328965306859864e-05, - "loss": 1.1715, - "step": 982 - }, - { - "epoch": 0.74399243140965, - "grad_norm": 2.786910057067871, - "learning_rate": 1.732350107968523e-05, - "loss": 1.2307, - "step": 983 - }, - { - "epoch": 0.7447492904446547, - "grad_norm": 2.5757007598876953, - "learning_rate": 1.7318032132538078e-05, - "loss": 1.204, - "step": 984 - }, - { - "epoch": 0.7455061494796594, - "grad_norm": 2.4591543674468994, - "learning_rate": 1.7312558468943595e-05, - "loss": 1.1665, - "step": 985 - }, - { - "epoch": 0.7462630085146641, - "grad_norm": 2.4593307971954346, - "learning_rate": 1.730708009243003e-05, - "loss": 1.2571, - "step": 986 - }, - { - "epoch": 0.7470198675496689, - "grad_norm": 2.507080554962158, - "learning_rate": 1.7301597006528654e-05, - "loss": 1.2222, - "step": 987 - }, - { - "epoch": 0.7477767265846736, - "grad_norm": 2.445662498474121, - "learning_rate": 1.7296109214773782e-05, - "loss": 1.2066, - "step": 988 - }, - { - "epoch": 0.7485335856196783, - "grad_norm": 2.341787099838257, - "learning_rate": 1.7290616720702768e-05, - "loss": 1.2395, - "step": 989 - }, - { - "epoch": 0.7492904446546831, - "grad_norm": 2.569960832595825, - "learning_rate": 1.728511952785598e-05, - "loss": 1.241, - "step": 990 - }, - { - "epoch": 0.7500473036896877, - "grad_norm": 2.5241215229034424, - "learning_rate": 1.7279617639776836e-05, - "loss": 1.2231, - "step": 991 - }, - { - "epoch": 0.7508041627246925, - "grad_norm": 2.4361581802368164, - "learning_rate": 1.727411106001176e-05, - "loss": 1.2381, - "step": 992 - }, - { - "epoch": 0.7515610217596973, - "grad_norm": 2.3338370323181152, - "learning_rate": 1.7268599792110213e-05, - "loss": 1.2526, - "step": 993 - }, - { - "epoch": 0.752317880794702, - "grad_norm": 2.398029327392578, - "learning_rate": 1.726308383962467e-05, - "loss": 1.2405, - "step": 994 - }, - { - "epoch": 0.7530747398297067, - "grad_norm": 2.437852382659912, - "learning_rate": 1.7257563206110636e-05, - "loss": 1.2553, - "step": 995 - }, - { - "epoch": 0.7538315988647114, - "grad_norm": 2.763335704803467, - "learning_rate": 1.7252037895126622e-05, - "loss": 1.2342, - "step": 996 - }, - { - "epoch": 0.7545884578997162, - "grad_norm": 2.5191261768341064, - "learning_rate": 1.7246507910234162e-05, - "loss": 1.2188, - "step": 997 - }, - { - "epoch": 0.755345316934721, - "grad_norm": 2.5067646503448486, - "learning_rate": 1.72409732549978e-05, - "loss": 1.1707, - "step": 998 - }, - { - "epoch": 0.7561021759697256, - "grad_norm": 2.400637626647949, - "learning_rate": 1.7235433932985092e-05, - "loss": 1.1599, - "step": 999 - }, - { - "epoch": 0.7568590350047304, - "grad_norm": 2.410027027130127, - "learning_rate": 1.7229889947766597e-05, - "loss": 1.2442, - "step": 1000 - }, - { - "epoch": 0.7576158940397351, - "grad_norm": 2.3706107139587402, - "learning_rate": 1.7224341302915885e-05, - "loss": 1.2264, - "step": 1001 - }, - { - "epoch": 0.7583727530747398, - "grad_norm": 2.4948031902313232, - "learning_rate": 1.7218788002009527e-05, - "loss": 1.2505, - "step": 1002 - }, - { - "epoch": 0.7591296121097446, - "grad_norm": 2.4337100982666016, - "learning_rate": 1.7213230048627093e-05, - "loss": 1.2416, - "step": 1003 - }, - { - "epoch": 0.7598864711447493, - "grad_norm": 2.2913546562194824, - "learning_rate": 1.7207667446351165e-05, - "loss": 1.22, - "step": 1004 - }, - { - "epoch": 0.760643330179754, - "grad_norm": 2.4365074634552, - "learning_rate": 1.72021001987673e-05, - "loss": 1.2398, - "step": 1005 - }, - { - "epoch": 0.7614001892147587, - "grad_norm": 2.4662392139434814, - "learning_rate": 1.7196528309464067e-05, - "loss": 1.2372, - "step": 1006 - }, - { - "epoch": 0.7621570482497635, - "grad_norm": 2.5085933208465576, - "learning_rate": 1.719095178203302e-05, - "loss": 1.2408, - "step": 1007 - }, - { - "epoch": 0.7629139072847683, - "grad_norm": 2.447695016860962, - "learning_rate": 1.7185370620068705e-05, - "loss": 1.2062, - "step": 1008 - }, - { - "epoch": 0.7636707663197729, - "grad_norm": 2.8076727390289307, - "learning_rate": 1.717978482716865e-05, - "loss": 1.2051, - "step": 1009 - }, - { - "epoch": 0.7644276253547777, - "grad_norm": 2.6247246265411377, - "learning_rate": 1.7174194406933377e-05, - "loss": 1.1861, - "step": 1010 - }, - { - "epoch": 0.7651844843897824, - "grad_norm": 2.6273937225341797, - "learning_rate": 1.7168599362966382e-05, - "loss": 1.1919, - "step": 1011 - }, - { - "epoch": 0.7659413434247871, - "grad_norm": 2.363234281539917, - "learning_rate": 1.7162999698874144e-05, - "loss": 1.203, - "step": 1012 - }, - { - "epoch": 0.7666982024597918, - "grad_norm": 2.4418020248413086, - "learning_rate": 1.7157395418266125e-05, - "loss": 1.2146, - "step": 1013 - }, - { - "epoch": 0.7674550614947966, - "grad_norm": 2.4737863540649414, - "learning_rate": 1.7151786524754755e-05, - "loss": 1.2149, - "step": 1014 - }, - { - "epoch": 0.7682119205298014, - "grad_norm": 2.2613844871520996, - "learning_rate": 1.7146173021955444e-05, - "loss": 1.2276, - "step": 1015 - }, - { - "epoch": 0.768968779564806, - "grad_norm": 2.626579523086548, - "learning_rate": 1.714055491348657e-05, - "loss": 1.2384, - "step": 1016 - }, - { - "epoch": 0.7697256385998108, - "grad_norm": 2.406792163848877, - "learning_rate": 1.7134932202969482e-05, - "loss": 1.2285, - "step": 1017 - }, - { - "epoch": 0.7704824976348155, - "grad_norm": 2.456866979598999, - "learning_rate": 1.7129304894028483e-05, - "loss": 1.1853, - "step": 1018 - }, - { - "epoch": 0.7712393566698202, - "grad_norm": 2.5044846534729004, - "learning_rate": 1.7123672990290864e-05, - "loss": 1.212, - "step": 1019 - }, - { - "epoch": 0.771996215704825, - "grad_norm": 2.1986587047576904, - "learning_rate": 1.7118036495386856e-05, - "loss": 1.2106, - "step": 1020 - }, - { - "epoch": 0.7727530747398297, - "grad_norm": 2.4531362056732178, - "learning_rate": 1.7112395412949662e-05, - "loss": 1.2466, - "step": 1021 - }, - { - "epoch": 0.7735099337748345, - "grad_norm": 2.2251899242401123, - "learning_rate": 1.7106749746615437e-05, - "loss": 1.1857, - "step": 1022 - }, - { - "epoch": 0.7742667928098391, - "grad_norm": 2.2850799560546875, - "learning_rate": 1.7101099500023287e-05, - "loss": 1.2499, - "step": 1023 - }, - { - "epoch": 0.7750236518448439, - "grad_norm": 2.3555169105529785, - "learning_rate": 1.709544467681528e-05, - "loss": 1.2139, - "step": 1024 - }, - { - "epoch": 0.7757805108798487, - "grad_norm": 2.54640531539917, - "learning_rate": 1.7089785280636428e-05, - "loss": 1.2121, - "step": 1025 - }, - { - "epoch": 0.7765373699148533, - "grad_norm": 2.3403546810150146, - "learning_rate": 1.708412131513469e-05, - "loss": 1.2294, - "step": 1026 - }, - { - "epoch": 0.7772942289498581, - "grad_norm": 2.450343370437622, - "learning_rate": 1.707845278396097e-05, - "loss": 1.2224, - "step": 1027 - }, - { - "epoch": 0.7780510879848628, - "grad_norm": 2.4089951515197754, - "learning_rate": 1.707277969076912e-05, - "loss": 1.2295, - "step": 1028 - }, - { - "epoch": 0.7788079470198676, - "grad_norm": 2.244898796081543, - "learning_rate": 1.7067102039215928e-05, - "loss": 1.2656, - "step": 1029 - }, - { - "epoch": 0.7795648060548723, - "grad_norm": 2.2754669189453125, - "learning_rate": 1.7061419832961122e-05, - "loss": 1.2106, - "step": 1030 - }, - { - "epoch": 0.780321665089877, - "grad_norm": 2.3827311992645264, - "learning_rate": 1.7055733075667368e-05, - "loss": 1.1916, - "step": 1031 - }, - { - "epoch": 0.7810785241248818, - "grad_norm": 3.2731504440307617, - "learning_rate": 1.7050041771000258e-05, - "loss": 1.2265, - "step": 1032 - }, - { - "epoch": 0.7818353831598864, - "grad_norm": 2.48207950592041, - "learning_rate": 1.7044345922628326e-05, - "loss": 1.2305, - "step": 1033 - }, - { - "epoch": 0.7825922421948912, - "grad_norm": 2.3561174869537354, - "learning_rate": 1.703864553422302e-05, - "loss": 1.2191, - "step": 1034 - }, - { - "epoch": 0.783349101229896, - "grad_norm": 2.4696364402770996, - "learning_rate": 1.703294060945873e-05, - "loss": 1.2354, - "step": 1035 - }, - { - "epoch": 0.7841059602649006, - "grad_norm": 2.214374542236328, - "learning_rate": 1.7027231152012765e-05, - "loss": 1.1459, - "step": 1036 - }, - { - "epoch": 0.7848628192999054, - "grad_norm": 2.8190994262695312, - "learning_rate": 1.7021517165565352e-05, - "loss": 1.2289, - "step": 1037 - }, - { - "epoch": 0.7856196783349101, - "grad_norm": 2.3381307125091553, - "learning_rate": 1.701579865379964e-05, - "loss": 1.2142, - "step": 1038 - }, - { - "epoch": 0.7863765373699149, - "grad_norm": 2.4270827770233154, - "learning_rate": 1.7010075620401693e-05, - "loss": 1.175, - "step": 1039 - }, - { - "epoch": 0.7871333964049196, - "grad_norm": 2.5499768257141113, - "learning_rate": 1.7004348069060487e-05, - "loss": 1.1907, - "step": 1040 - }, - { - "epoch": 0.7878902554399243, - "grad_norm": 2.8665435314178467, - "learning_rate": 1.6998616003467923e-05, - "loss": 1.232, - "step": 1041 - }, - { - "epoch": 0.7886471144749291, - "grad_norm": 2.443026065826416, - "learning_rate": 1.6992879427318798e-05, - "loss": 1.206, - "step": 1042 - }, - { - "epoch": 0.7894039735099337, - "grad_norm": 2.408712148666382, - "learning_rate": 1.6987138344310822e-05, - "loss": 1.1984, - "step": 1043 - }, - { - "epoch": 0.7901608325449385, - "grad_norm": 2.5489931106567383, - "learning_rate": 1.6981392758144616e-05, - "loss": 1.27, - "step": 1044 - }, - { - "epoch": 0.7909176915799432, - "grad_norm": 2.2722368240356445, - "learning_rate": 1.6975642672523684e-05, - "loss": 1.2572, - "step": 1045 - }, - { - "epoch": 0.791674550614948, - "grad_norm": 2.921919822692871, - "learning_rate": 1.6969888091154452e-05, - "loss": 1.2433, - "step": 1046 - }, - { - "epoch": 0.7924314096499527, - "grad_norm": 2.41582989692688, - "learning_rate": 1.6964129017746236e-05, - "loss": 1.2537, - "step": 1047 - }, - { - "epoch": 0.7931882686849574, - "grad_norm": 2.557302474975586, - "learning_rate": 1.695836545601125e-05, - "loss": 1.2248, - "step": 1048 - }, - { - "epoch": 0.7939451277199622, - "grad_norm": 2.4108498096466064, - "learning_rate": 1.6952597409664587e-05, - "loss": 1.2337, - "step": 1049 - }, - { - "epoch": 0.7947019867549668, - "grad_norm": 2.3067305088043213, - "learning_rate": 1.694682488242425e-05, - "loss": 1.1974, - "step": 1050 - }, - { - "epoch": 0.7954588457899716, - "grad_norm": 2.3873379230499268, - "learning_rate": 1.6941047878011122e-05, - "loss": 1.204, - "step": 1051 - }, - { - "epoch": 0.7962157048249764, - "grad_norm": 2.3166935443878174, - "learning_rate": 1.6935266400148963e-05, - "loss": 1.1585, - "step": 1052 - }, - { - "epoch": 0.796972563859981, - "grad_norm": 2.312579870223999, - "learning_rate": 1.6929480452564438e-05, - "loss": 1.2178, - "step": 1053 - }, - { - "epoch": 0.7977294228949858, - "grad_norm": 2.3897957801818848, - "learning_rate": 1.6923690038987075e-05, - "loss": 1.1867, - "step": 1054 - }, - { - "epoch": 0.7984862819299905, - "grad_norm": 2.5109200477600098, - "learning_rate": 1.6917895163149282e-05, - "loss": 1.2219, - "step": 1055 - }, - { - "epoch": 0.7992431409649953, - "grad_norm": 2.4277844429016113, - "learning_rate": 1.6912095828786353e-05, - "loss": 1.2224, - "step": 1056 - }, - { - "epoch": 0.8, - "grad_norm": 2.285210609436035, - "learning_rate": 1.6906292039636452e-05, - "loss": 1.233, - "step": 1057 - }, - { - "epoch": 0.8007568590350047, - "grad_norm": 2.475517511367798, - "learning_rate": 1.690048379944061e-05, - "loss": 1.2606, - "step": 1058 - }, - { - "epoch": 0.8015137180700095, - "grad_norm": 2.2740111351013184, - "learning_rate": 1.6894671111942733e-05, - "loss": 1.1516, - "step": 1059 - }, - { - "epoch": 0.8022705771050141, - "grad_norm": 2.777266263961792, - "learning_rate": 1.6888853980889583e-05, - "loss": 1.2257, - "step": 1060 - }, - { - "epoch": 0.8030274361400189, - "grad_norm": 2.4774162769317627, - "learning_rate": 1.6883032410030796e-05, - "loss": 1.2161, - "step": 1061 - }, - { - "epoch": 0.8037842951750237, - "grad_norm": 2.4283878803253174, - "learning_rate": 1.6877206403118875e-05, - "loss": 1.2258, - "step": 1062 - }, - { - "epoch": 0.8045411542100284, - "grad_norm": 2.2770519256591797, - "learning_rate": 1.687137596390917e-05, - "loss": 1.2209, - "step": 1063 - }, - { - "epoch": 0.8052980132450331, - "grad_norm": 2.4034667015075684, - "learning_rate": 1.6865541096159895e-05, - "loss": 1.1773, - "step": 1064 - }, - { - "epoch": 0.8060548722800378, - "grad_norm": 2.2633402347564697, - "learning_rate": 1.6859701803632117e-05, - "loss": 1.2552, - "step": 1065 - }, - { - "epoch": 0.8068117313150426, - "grad_norm": 2.3959109783172607, - "learning_rate": 1.6853858090089753e-05, - "loss": 1.27, - "step": 1066 - }, - { - "epoch": 0.8075685903500474, - "grad_norm": 2.4210898876190186, - "learning_rate": 1.6848009959299575e-05, - "loss": 1.2173, - "step": 1067 - }, - { - "epoch": 0.808325449385052, - "grad_norm": 2.3308327198028564, - "learning_rate": 1.6842157415031194e-05, - "loss": 1.2738, - "step": 1068 - }, - { - "epoch": 0.8090823084200568, - "grad_norm": 3.167160987854004, - "learning_rate": 1.683630046105707e-05, - "loss": 1.2349, - "step": 1069 - }, - { - "epoch": 0.8098391674550615, - "grad_norm": 2.1552276611328125, - "learning_rate": 1.6830439101152513e-05, - "loss": 1.2436, - "step": 1070 - }, - { - "epoch": 0.8105960264900662, - "grad_norm": 2.5437731742858887, - "learning_rate": 1.682457333909566e-05, - "loss": 1.2039, - "step": 1071 - }, - { - "epoch": 0.811352885525071, - "grad_norm": 2.1334102153778076, - "learning_rate": 1.6818703178667496e-05, - "loss": 1.2173, - "step": 1072 - }, - { - "epoch": 0.8121097445600757, - "grad_norm": 2.5743660926818848, - "learning_rate": 1.6812828623651832e-05, - "loss": 1.2132, - "step": 1073 - }, - { - "epoch": 0.8128666035950805, - "grad_norm": 2.4903461933135986, - "learning_rate": 1.6806949677835328e-05, - "loss": 1.2428, - "step": 1074 - }, - { - "epoch": 0.8136234626300851, - "grad_norm": 2.2703421115875244, - "learning_rate": 1.6801066345007447e-05, - "loss": 1.1828, - "step": 1075 - }, - { - "epoch": 0.8143803216650899, - "grad_norm": 2.615246295928955, - "learning_rate": 1.6795178628960508e-05, - "loss": 1.2361, - "step": 1076 - }, - { - "epoch": 0.8151371807000946, - "grad_norm": 2.6063549518585205, - "learning_rate": 1.6789286533489635e-05, - "loss": 1.2586, - "step": 1077 - }, - { - "epoch": 0.8158940397350993, - "grad_norm": 2.725470542907715, - "learning_rate": 1.6783390062392788e-05, - "loss": 1.2166, - "step": 1078 - }, - { - "epoch": 0.8166508987701041, - "grad_norm": 2.576597213745117, - "learning_rate": 1.6777489219470743e-05, - "loss": 1.231, - "step": 1079 - }, - { - "epoch": 0.8174077578051088, - "grad_norm": 2.37703537940979, - "learning_rate": 1.677158400852708e-05, - "loss": 1.2663, - "step": 1080 - }, - { - "epoch": 0.8181646168401135, - "grad_norm": 2.3021481037139893, - "learning_rate": 1.6765674433368232e-05, - "loss": 1.2091, - "step": 1081 - }, - { - "epoch": 0.8189214758751182, - "grad_norm": 2.4437525272369385, - "learning_rate": 1.67597604978034e-05, - "loss": 1.2123, - "step": 1082 - }, - { - "epoch": 0.819678334910123, - "grad_norm": 2.470407724380493, - "learning_rate": 1.6753842205644628e-05, - "loss": 1.1948, - "step": 1083 - }, - { - "epoch": 0.8204351939451278, - "grad_norm": 2.5628767013549805, - "learning_rate": 1.6747919560706752e-05, - "loss": 1.2347, - "step": 1084 - }, - { - "epoch": 0.8211920529801324, - "grad_norm": 2.5520646572113037, - "learning_rate": 1.6741992566807416e-05, - "loss": 1.2319, - "step": 1085 - }, - { - "epoch": 0.8219489120151372, - "grad_norm": 2.4275975227355957, - "learning_rate": 1.673606122776708e-05, - "loss": 1.1666, - "step": 1086 - }, - { - "epoch": 0.8227057710501419, - "grad_norm": 2.803802728652954, - "learning_rate": 1.6730125547408984e-05, - "loss": 1.1861, - "step": 1087 - }, - { - "epoch": 0.8234626300851466, - "grad_norm": 3.4586920738220215, - "learning_rate": 1.6724185529559185e-05, - "loss": 1.2278, - "step": 1088 - }, - { - "epoch": 0.8242194891201514, - "grad_norm": 2.3933305740356445, - "learning_rate": 1.6718241178046526e-05, - "loss": 1.2148, - "step": 1089 - }, - { - "epoch": 0.8249763481551561, - "grad_norm": 2.64758038520813, - "learning_rate": 1.671229249670264e-05, - "loss": 1.2138, - "step": 1090 - }, - { - "epoch": 0.8257332071901609, - "grad_norm": 2.389108896255493, - "learning_rate": 1.6706339489361962e-05, - "loss": 1.2295, - "step": 1091 - }, - { - "epoch": 0.8264900662251655, - "grad_norm": 2.5130155086517334, - "learning_rate": 1.6700382159861705e-05, - "loss": 1.196, - "step": 1092 - }, - { - "epoch": 0.8272469252601703, - "grad_norm": 2.287849187850952, - "learning_rate": 1.6694420512041878e-05, - "loss": 1.2286, - "step": 1093 - }, - { - "epoch": 0.8280037842951751, - "grad_norm": 2.378422737121582, - "learning_rate": 1.6688454549745263e-05, - "loss": 1.2518, - "step": 1094 - }, - { - "epoch": 0.8287606433301797, - "grad_norm": 2.3797566890716553, - "learning_rate": 1.6682484276817433e-05, - "loss": 1.2228, - "step": 1095 - }, - { - "epoch": 0.8295175023651845, - "grad_norm": 2.276672124862671, - "learning_rate": 1.667650969710673e-05, - "loss": 1.2281, - "step": 1096 - }, - { - "epoch": 0.8302743614001892, - "grad_norm": 2.550900459289551, - "learning_rate": 1.6670530814464284e-05, - "loss": 1.2279, - "step": 1097 - }, - { - "epoch": 0.831031220435194, - "grad_norm": 2.5035128593444824, - "learning_rate": 1.6664547632743987e-05, - "loss": 1.1628, - "step": 1098 - }, - { - "epoch": 0.8317880794701987, - "grad_norm": 2.664567708969116, - "learning_rate": 1.6658560155802506e-05, - "loss": 1.187, - "step": 1099 - }, - { - "epoch": 0.8325449385052034, - "grad_norm": 2.5373306274414062, - "learning_rate": 1.665256838749928e-05, - "loss": 1.2422, - "step": 1100 - }, - { - "epoch": 0.8333017975402082, - "grad_norm": 2.7911324501037598, - "learning_rate": 1.664657233169651e-05, - "loss": 1.1982, - "step": 1101 - }, - { - "epoch": 0.8340586565752128, - "grad_norm": 2.663367509841919, - "learning_rate": 1.664057199225916e-05, - "loss": 1.2578, - "step": 1102 - }, - { - "epoch": 0.8348155156102176, - "grad_norm": 2.486424684524536, - "learning_rate": 1.663456737305496e-05, - "loss": 1.2106, - "step": 1103 - }, - { - "epoch": 0.8355723746452224, - "grad_norm": 2.503634214401245, - "learning_rate": 1.66285584779544e-05, - "loss": 1.2192, - "step": 1104 - }, - { - "epoch": 0.836329233680227, - "grad_norm": 2.679033041000366, - "learning_rate": 1.6622545310830712e-05, - "loss": 1.204, - "step": 1105 - }, - { - "epoch": 0.8370860927152318, - "grad_norm": 2.7814950942993164, - "learning_rate": 1.66165278755599e-05, - "loss": 1.2133, - "step": 1106 - }, - { - "epoch": 0.8378429517502365, - "grad_norm": 2.5719947814941406, - "learning_rate": 1.6610506176020707e-05, - "loss": 1.2457, - "step": 1107 - }, - { - "epoch": 0.8385998107852413, - "grad_norm": 3.662503957748413, - "learning_rate": 1.660448021609463e-05, - "loss": 1.2288, - "step": 1108 - }, - { - "epoch": 0.8393566698202459, - "grad_norm": 2.62904691696167, - "learning_rate": 1.659844999966591e-05, - "loss": 1.2202, - "step": 1109 - }, - { - "epoch": 0.8401135288552507, - "grad_norm": 2.6756417751312256, - "learning_rate": 1.659241553062154e-05, - "loss": 1.2438, - "step": 1110 - }, - { - "epoch": 0.8408703878902555, - "grad_norm": 2.762983798980713, - "learning_rate": 1.6586376812851233e-05, - "loss": 1.1499, - "step": 1111 - }, - { - "epoch": 0.8416272469252601, - "grad_norm": 2.4654974937438965, - "learning_rate": 1.6580333850247462e-05, - "loss": 1.2398, - "step": 1112 - }, - { - "epoch": 0.8423841059602649, - "grad_norm": 2.5800747871398926, - "learning_rate": 1.657428664670543e-05, - "loss": 1.2564, - "step": 1113 - }, - { - "epoch": 0.8431409649952696, - "grad_norm": 2.4179458618164062, - "learning_rate": 1.6568235206123073e-05, - "loss": 1.1874, - "step": 1114 - }, - { - "epoch": 0.8438978240302744, - "grad_norm": 2.4252541065216064, - "learning_rate": 1.6562179532401053e-05, - "loss": 1.2568, - "step": 1115 - }, - { - "epoch": 0.8446546830652791, - "grad_norm": 2.5447540283203125, - "learning_rate": 1.6556119629442764e-05, - "loss": 1.1884, - "step": 1116 - }, - { - "epoch": 0.8454115421002838, - "grad_norm": 2.5056309700012207, - "learning_rate": 1.655005550115433e-05, - "loss": 1.2665, - "step": 1117 - }, - { - "epoch": 0.8461684011352886, - "grad_norm": 2.7429358959198, - "learning_rate": 1.65439871514446e-05, - "loss": 1.1835, - "step": 1118 - }, - { - "epoch": 0.8469252601702932, - "grad_norm": 2.6835551261901855, - "learning_rate": 1.653791458422513e-05, - "loss": 1.2781, - "step": 1119 - }, - { - "epoch": 0.847682119205298, - "grad_norm": 2.8627474308013916, - "learning_rate": 1.653183780341021e-05, - "loss": 1.1931, - "step": 1120 - }, - { - "epoch": 0.8484389782403028, - "grad_norm": 2.758310556411743, - "learning_rate": 1.652575681291684e-05, - "loss": 1.1944, - "step": 1121 - }, - { - "epoch": 0.8491958372753075, - "grad_norm": 2.761715888977051, - "learning_rate": 1.6519671616664734e-05, - "loss": 1.2457, - "step": 1122 - }, - { - "epoch": 0.8499526963103122, - "grad_norm": 2.8214142322540283, - "learning_rate": 1.6513582218576315e-05, - "loss": 1.2203, - "step": 1123 - }, - { - "epoch": 0.8507095553453169, - "grad_norm": 2.6501047611236572, - "learning_rate": 1.6507488622576712e-05, - "loss": 1.2591, - "step": 1124 - }, - { - "epoch": 0.8514664143803217, - "grad_norm": 2.4939935207366943, - "learning_rate": 1.6501390832593777e-05, - "loss": 1.2205, - "step": 1125 - }, - { - "epoch": 0.8522232734153264, - "grad_norm": 2.5232126712799072, - "learning_rate": 1.6495288852558036e-05, - "loss": 1.2055, - "step": 1126 - }, - { - "epoch": 0.8529801324503311, - "grad_norm": 2.805695056915283, - "learning_rate": 1.6489182686402753e-05, - "loss": 1.2069, - "step": 1127 - }, - { - "epoch": 0.8537369914853359, - "grad_norm": 2.588597059249878, - "learning_rate": 1.6483072338063844e-05, - "loss": 1.1991, - "step": 1128 - }, - { - "epoch": 0.8544938505203405, - "grad_norm": 2.632336378097534, - "learning_rate": 1.6476957811479966e-05, - "loss": 1.2556, - "step": 1129 - }, - { - "epoch": 0.8552507095553453, - "grad_norm": 2.6084611415863037, - "learning_rate": 1.6470839110592445e-05, - "loss": 1.2257, - "step": 1130 - }, - { - "epoch": 0.8560075685903501, - "grad_norm": 2.4824182987213135, - "learning_rate": 1.6464716239345296e-05, - "loss": 1.1898, - "step": 1131 - }, - { - "epoch": 0.8567644276253548, - "grad_norm": 2.5742006301879883, - "learning_rate": 1.6458589201685235e-05, - "loss": 1.2024, - "step": 1132 - }, - { - "epoch": 0.8575212866603595, - "grad_norm": 2.2470591068267822, - "learning_rate": 1.6452458001561655e-05, - "loss": 1.2154, - "step": 1133 - }, - { - "epoch": 0.8582781456953642, - "grad_norm": 2.5146355628967285, - "learning_rate": 1.6446322642926636e-05, - "loss": 1.2202, - "step": 1134 - }, - { - "epoch": 0.859035004730369, - "grad_norm": 2.580735683441162, - "learning_rate": 1.644018312973493e-05, - "loss": 1.1595, - "step": 1135 - }, - { - "epoch": 0.8597918637653738, - "grad_norm": 2.558544635772705, - "learning_rate": 1.6434039465943984e-05, - "loss": 1.2048, - "step": 1136 - }, - { - "epoch": 0.8605487228003784, - "grad_norm": 2.5136754512786865, - "learning_rate": 1.64278916555139e-05, - "loss": 1.2003, - "step": 1137 - }, - { - "epoch": 0.8613055818353832, - "grad_norm": 2.7524209022521973, - "learning_rate": 1.6421739702407468e-05, - "loss": 1.1862, - "step": 1138 - }, - { - "epoch": 0.8620624408703879, - "grad_norm": 2.519251585006714, - "learning_rate": 1.6415583610590144e-05, - "loss": 1.2471, - "step": 1139 - }, - { - "epoch": 0.8628192999053926, - "grad_norm": 2.7237823009490967, - "learning_rate": 1.6409423384030046e-05, - "loss": 1.1878, - "step": 1140 - }, - { - "epoch": 0.8635761589403973, - "grad_norm": 2.595668315887451, - "learning_rate": 1.6403259026697967e-05, - "loss": 1.2164, - "step": 1141 - }, - { - "epoch": 0.8643330179754021, - "grad_norm": 2.6703858375549316, - "learning_rate": 1.6397090542567356e-05, - "loss": 1.1944, - "step": 1142 - }, - { - "epoch": 0.8650898770104068, - "grad_norm": 2.558354139328003, - "learning_rate": 1.639091793561432e-05, - "loss": 1.2423, - "step": 1143 - }, - { - "epoch": 0.8658467360454115, - "grad_norm": 2.445343494415283, - "learning_rate": 1.6384741209817638e-05, - "loss": 1.1989, - "step": 1144 - }, - { - "epoch": 0.8666035950804163, - "grad_norm": 2.3659980297088623, - "learning_rate": 1.6378560369158724e-05, - "loss": 1.1969, - "step": 1145 - }, - { - "epoch": 0.867360454115421, - "grad_norm": 2.6195647716522217, - "learning_rate": 1.6372375417621654e-05, - "loss": 1.2012, - "step": 1146 - }, - { - "epoch": 0.8681173131504257, - "grad_norm": 2.528627395629883, - "learning_rate": 1.6366186359193155e-05, - "loss": 1.2365, - "step": 1147 - }, - { - "epoch": 0.8688741721854305, - "grad_norm": 2.360337734222412, - "learning_rate": 1.6359993197862604e-05, - "loss": 1.2192, - "step": 1148 - }, - { - "epoch": 0.8696310312204352, - "grad_norm": 2.3621413707733154, - "learning_rate": 1.635379593762201e-05, - "loss": 1.2015, - "step": 1149 - }, - { - "epoch": 0.8703878902554399, - "grad_norm": 2.5273406505584717, - "learning_rate": 1.6347594582466038e-05, - "loss": 1.187, - "step": 1150 - }, - { - "epoch": 0.8711447492904446, - "grad_norm": 2.8172874450683594, - "learning_rate": 1.6341389136391985e-05, - "loss": 1.2271, - "step": 1151 - }, - { - "epoch": 0.8719016083254494, - "grad_norm": 2.3418102264404297, - "learning_rate": 1.6335179603399788e-05, - "loss": 1.2358, - "step": 1152 - }, - { - "epoch": 0.8726584673604542, - "grad_norm": 2.415493965148926, - "learning_rate": 1.632896598749202e-05, - "loss": 1.2717, - "step": 1153 - }, - { - "epoch": 0.8734153263954588, - "grad_norm": 2.3262200355529785, - "learning_rate": 1.6322748292673875e-05, - "loss": 1.2198, - "step": 1154 - }, - { - "epoch": 0.8741721854304636, - "grad_norm": 2.8730580806732178, - "learning_rate": 1.6316526522953195e-05, - "loss": 1.183, - "step": 1155 - }, - { - "epoch": 0.8749290444654683, - "grad_norm": 2.432713508605957, - "learning_rate": 1.631030068234043e-05, - "loss": 1.2093, - "step": 1156 - }, - { - "epoch": 0.875685903500473, - "grad_norm": 2.6007068157196045, - "learning_rate": 1.630407077484866e-05, - "loss": 1.2506, - "step": 1157 - }, - { - "epoch": 0.8764427625354778, - "grad_norm": 2.785717487335205, - "learning_rate": 1.6297836804493598e-05, - "loss": 1.2073, - "step": 1158 - }, - { - "epoch": 0.8771996215704825, - "grad_norm": 2.498161792755127, - "learning_rate": 1.629159877529356e-05, - "loss": 1.2297, - "step": 1159 - }, - { - "epoch": 0.8779564806054873, - "grad_norm": 2.6516387462615967, - "learning_rate": 1.628535669126948e-05, - "loss": 1.2242, - "step": 1160 - }, - { - "epoch": 0.8787133396404919, - "grad_norm": 2.164231300354004, - "learning_rate": 1.627911055644492e-05, - "loss": 1.242, - "step": 1161 - }, - { - "epoch": 0.8794701986754967, - "grad_norm": 2.1503818035125732, - "learning_rate": 1.6272860374846037e-05, - "loss": 1.2187, - "step": 1162 - }, - { - "epoch": 0.8802270577105015, - "grad_norm": 1.9819633960723877, - "learning_rate": 1.6266606150501608e-05, - "loss": 1.2044, - "step": 1163 - }, - { - "epoch": 0.8809839167455061, - "grad_norm": 2.251472234725952, - "learning_rate": 1.6260347887443e-05, - "loss": 1.2262, - "step": 1164 - }, - { - "epoch": 0.8817407757805109, - "grad_norm": 2.2765519618988037, - "learning_rate": 1.625408558970421e-05, - "loss": 1.185, - "step": 1165 - }, - { - "epoch": 0.8824976348155156, - "grad_norm": 2.4048166275024414, - "learning_rate": 1.6247819261321803e-05, - "loss": 1.1973, - "step": 1166 - }, - { - "epoch": 0.8832544938505204, - "grad_norm": 2.234778881072998, - "learning_rate": 1.624154890633497e-05, - "loss": 1.1795, - "step": 1167 - }, - { - "epoch": 0.8840113528855251, - "grad_norm": 2.2173893451690674, - "learning_rate": 1.623527452878548e-05, - "loss": 1.1897, - "step": 1168 - }, - { - "epoch": 0.8847682119205298, - "grad_norm": 2.3917415142059326, - "learning_rate": 1.6228996132717702e-05, - "loss": 1.2425, - "step": 1169 - }, - { - "epoch": 0.8855250709555346, - "grad_norm": 2.254404306411743, - "learning_rate": 1.62227137221786e-05, - "loss": 1.2495, - "step": 1170 - }, - { - "epoch": 0.8862819299905392, - "grad_norm": 2.1990530490875244, - "learning_rate": 1.6216427301217713e-05, - "loss": 1.1757, - "step": 1171 - }, - { - "epoch": 0.887038789025544, - "grad_norm": 2.3781630992889404, - "learning_rate": 1.6210136873887176e-05, - "loss": 1.2387, - "step": 1172 - }, - { - "epoch": 0.8877956480605487, - "grad_norm": 2.1774098873138428, - "learning_rate": 1.6203842444241703e-05, - "loss": 1.1937, - "step": 1173 - }, - { - "epoch": 0.8885525070955534, - "grad_norm": 2.3653695583343506, - "learning_rate": 1.619754401633858e-05, - "loss": 1.2115, - "step": 1174 - }, - { - "epoch": 0.8893093661305582, - "grad_norm": 2.3032443523406982, - "learning_rate": 1.619124159423769e-05, - "loss": 1.1802, - "step": 1175 - }, - { - "epoch": 0.8900662251655629, - "grad_norm": 2.2687666416168213, - "learning_rate": 1.618493518200147e-05, - "loss": 1.1868, - "step": 1176 - }, - { - "epoch": 0.8908230842005677, - "grad_norm": 2.3060355186462402, - "learning_rate": 1.6178624783694937e-05, - "loss": 1.1933, - "step": 1177 - }, - { - "epoch": 0.8915799432355723, - "grad_norm": 3.4064903259277344, - "learning_rate": 1.6172310403385677e-05, - "loss": 1.234, - "step": 1178 - }, - { - "epoch": 0.8923368022705771, - "grad_norm": 2.568434715270996, - "learning_rate": 1.616599204514385e-05, - "loss": 1.2115, - "step": 1179 - }, - { - "epoch": 0.8930936613055819, - "grad_norm": 2.2627182006835938, - "learning_rate": 1.6159669713042166e-05, - "loss": 1.2229, - "step": 1180 - }, - { - "epoch": 0.8938505203405865, - "grad_norm": 2.2551677227020264, - "learning_rate": 1.615334341115591e-05, - "loss": 1.2481, - "step": 1181 - }, - { - "epoch": 0.8946073793755913, - "grad_norm": 2.4630017280578613, - "learning_rate": 1.6147013143562915e-05, - "loss": 1.1769, - "step": 1182 - }, - { - "epoch": 0.895364238410596, - "grad_norm": 2.2972466945648193, - "learning_rate": 1.6140678914343575e-05, - "loss": 1.2028, - "step": 1183 - }, - { - "epoch": 0.8961210974456008, - "grad_norm": 2.343468189239502, - "learning_rate": 1.6134340727580843e-05, - "loss": 1.2356, - "step": 1184 - }, - { - "epoch": 0.8968779564806055, - "grad_norm": 2.2180895805358887, - "learning_rate": 1.6127998587360208e-05, - "loss": 1.2082, - "step": 1185 - }, - { - "epoch": 0.8976348155156102, - "grad_norm": 2.121718406677246, - "learning_rate": 1.6121652497769727e-05, - "loss": 1.2052, - "step": 1186 - }, - { - "epoch": 0.898391674550615, - "grad_norm": 2.2796201705932617, - "learning_rate": 1.6115302462899982e-05, - "loss": 1.2056, - "step": 1187 - }, - { - "epoch": 0.8991485335856196, - "grad_norm": 2.1909053325653076, - "learning_rate": 1.6108948486844118e-05, - "loss": 1.1556, - "step": 1188 - }, - { - "epoch": 0.8999053926206244, - "grad_norm": 2.4068331718444824, - "learning_rate": 1.610259057369781e-05, - "loss": 1.2258, - "step": 1189 - }, - { - "epoch": 0.9006622516556292, - "grad_norm": 2.168159246444702, - "learning_rate": 1.6096228727559265e-05, - "loss": 1.1805, - "step": 1190 - }, - { - "epoch": 0.9014191106906339, - "grad_norm": 2.129645586013794, - "learning_rate": 1.608986295252924e-05, - "loss": 1.1725, - "step": 1191 - }, - { - "epoch": 0.9021759697256386, - "grad_norm": 2.3025479316711426, - "learning_rate": 1.608349325271101e-05, - "loss": 1.1873, - "step": 1192 - }, - { - "epoch": 0.9029328287606433, - "grad_norm": 2.3402655124664307, - "learning_rate": 1.607711963221039e-05, - "loss": 1.2049, - "step": 1193 - }, - { - "epoch": 0.9036896877956481, - "grad_norm": 2.844715118408203, - "learning_rate": 1.6070742095135722e-05, - "loss": 1.1654, - "step": 1194 - }, - { - "epoch": 0.9044465468306528, - "grad_norm": 2.337291717529297, - "learning_rate": 1.6064360645597862e-05, - "loss": 1.2119, - "step": 1195 - }, - { - "epoch": 0.9052034058656575, - "grad_norm": 2.2666563987731934, - "learning_rate": 1.60579752877102e-05, - "loss": 1.1662, - "step": 1196 - }, - { - "epoch": 0.9059602649006623, - "grad_norm": 2.2546000480651855, - "learning_rate": 1.6051586025588634e-05, - "loss": 1.1612, - "step": 1197 - }, - { - "epoch": 0.906717123935667, - "grad_norm": 2.4789252281188965, - "learning_rate": 1.6045192863351594e-05, - "loss": 1.159, - "step": 1198 - }, - { - "epoch": 0.9074739829706717, - "grad_norm": 2.5757458209991455, - "learning_rate": 1.6038795805120005e-05, - "loss": 1.1359, - "step": 1199 - }, - { - "epoch": 0.9082308420056765, - "grad_norm": 2.5025620460510254, - "learning_rate": 1.603239485501732e-05, - "loss": 1.2513, - "step": 1200 - }, - { - "epoch": 0.9089877010406812, - "grad_norm": 2.6904783248901367, - "learning_rate": 1.6025990017169495e-05, - "loss": 1.2275, - "step": 1201 - }, - { - "epoch": 0.9097445600756859, - "grad_norm": 2.121021270751953, - "learning_rate": 1.6019581295704985e-05, - "loss": 1.2138, - "step": 1202 - }, - { - "epoch": 0.9105014191106906, - "grad_norm": 2.2942512035369873, - "learning_rate": 1.601316869475476e-05, - "loss": 1.207, - "step": 1203 - }, - { - "epoch": 0.9112582781456954, - "grad_norm": 2.4668707847595215, - "learning_rate": 1.6006752218452283e-05, - "loss": 1.2422, - "step": 1204 - }, - { - "epoch": 0.9120151371807, - "grad_norm": 2.536863088607788, - "learning_rate": 1.600033187093351e-05, - "loss": 1.2256, - "step": 1205 - }, - { - "epoch": 0.9127719962157048, - "grad_norm": 3.008856773376465, - "learning_rate": 1.599390765633691e-05, - "loss": 1.2129, - "step": 1206 - }, - { - "epoch": 0.9135288552507096, - "grad_norm": 2.292177438735962, - "learning_rate": 1.5987479578803425e-05, - "loss": 1.2237, - "step": 1207 - }, - { - "epoch": 0.9142857142857143, - "grad_norm": 2.4802656173706055, - "learning_rate": 1.59810476424765e-05, - "loss": 1.2307, - "step": 1208 - }, - { - "epoch": 0.915042573320719, - "grad_norm": 2.225219964981079, - "learning_rate": 1.5974611851502064e-05, - "loss": 1.1845, - "step": 1209 - }, - { - "epoch": 0.9157994323557237, - "grad_norm": 2.584470272064209, - "learning_rate": 1.5968172210028525e-05, - "loss": 1.1756, - "step": 1210 - }, - { - "epoch": 0.9165562913907285, - "grad_norm": 2.3518307209014893, - "learning_rate": 1.596172872220679e-05, - "loss": 1.1788, - "step": 1211 - }, - { - "epoch": 0.9173131504257332, - "grad_norm": 2.3497278690338135, - "learning_rate": 1.595528139219021e-05, - "loss": 1.2084, - "step": 1212 - }, - { - "epoch": 0.9180700094607379, - "grad_norm": 2.117664337158203, - "learning_rate": 1.594883022413466e-05, - "loss": 1.1765, - "step": 1213 - }, - { - "epoch": 0.9188268684957427, - "grad_norm": 2.1322619915008545, - "learning_rate": 1.594237522219845e-05, - "loss": 1.1835, - "step": 1214 - }, - { - "epoch": 0.9195837275307474, - "grad_norm": 2.0177836418151855, - "learning_rate": 1.5935916390542377e-05, - "loss": 1.178, - "step": 1215 - }, - { - "epoch": 0.9203405865657521, - "grad_norm": 2.2327425479888916, - "learning_rate": 1.5929453733329713e-05, - "loss": 1.1916, - "step": 1216 - }, - { - "epoch": 0.9210974456007569, - "grad_norm": 2.168905258178711, - "learning_rate": 1.592298725472618e-05, - "loss": 1.2139, - "step": 1217 - }, - { - "epoch": 0.9218543046357616, - "grad_norm": 2.275158166885376, - "learning_rate": 1.591651695889998e-05, - "loss": 1.2014, - "step": 1218 - }, - { - "epoch": 0.9226111636707663, - "grad_norm": 2.153704881668091, - "learning_rate": 1.5910042850021754e-05, - "loss": 1.2219, - "step": 1219 - }, - { - "epoch": 0.923368022705771, - "grad_norm": 2.161616802215576, - "learning_rate": 1.5903564932264624e-05, - "loss": 1.2452, - "step": 1220 - }, - { - "epoch": 0.9241248817407758, - "grad_norm": 2.1606664657592773, - "learning_rate": 1.589708320980416e-05, - "loss": 1.1448, - "step": 1221 - }, - { - "epoch": 0.9248817407757806, - "grad_norm": 2.040039300918579, - "learning_rate": 1.589059768681837e-05, - "loss": 1.235, - "step": 1222 - }, - { - "epoch": 0.9256385998107852, - "grad_norm": 2.2927193641662598, - "learning_rate": 1.5884108367487732e-05, - "loss": 1.19, - "step": 1223 - }, - { - "epoch": 0.92639545884579, - "grad_norm": 2.2096221446990967, - "learning_rate": 1.587761525599516e-05, - "loss": 1.2349, - "step": 1224 - }, - { - "epoch": 0.9271523178807947, - "grad_norm": 2.1982614994049072, - "learning_rate": 1.5871118356526017e-05, - "loss": 1.206, - "step": 1225 - }, - { - "epoch": 0.9279091769157994, - "grad_norm": 2.2477710247039795, - "learning_rate": 1.5864617673268096e-05, - "loss": 1.2044, - "step": 1226 - }, - { - "epoch": 0.9286660359508042, - "grad_norm": 2.126891851425171, - "learning_rate": 1.5858113210411646e-05, - "loss": 1.1685, - "step": 1227 - }, - { - "epoch": 0.9294228949858089, - "grad_norm": 2.6382102966308594, - "learning_rate": 1.585160497214935e-05, - "loss": 1.2247, - "step": 1228 - }, - { - "epoch": 0.9301797540208137, - "grad_norm": 2.1951191425323486, - "learning_rate": 1.5845092962676306e-05, - "loss": 1.1517, - "step": 1229 - }, - { - "epoch": 0.9309366130558183, - "grad_norm": 2.299997091293335, - "learning_rate": 1.5838577186190064e-05, - "loss": 1.2327, - "step": 1230 - }, - { - "epoch": 0.9316934720908231, - "grad_norm": 2.69441556930542, - "learning_rate": 1.5832057646890594e-05, - "loss": 1.1622, - "step": 1231 - }, - { - "epoch": 0.9324503311258279, - "grad_norm": 2.3268439769744873, - "learning_rate": 1.582553434898029e-05, - "loss": 1.2181, - "step": 1232 - }, - { - "epoch": 0.9332071901608325, - "grad_norm": 2.1350252628326416, - "learning_rate": 1.5819007296663974e-05, - "loss": 1.1818, - "step": 1233 - }, - { - "epoch": 0.9339640491958373, - "grad_norm": 2.0754928588867188, - "learning_rate": 1.5812476494148876e-05, - "loss": 1.1847, - "step": 1234 - }, - { - "epoch": 0.934720908230842, - "grad_norm": 2.48238205909729, - "learning_rate": 1.5805941945644658e-05, - "loss": 1.222, - "step": 1235 - }, - { - "epoch": 0.9354777672658467, - "grad_norm": 2.202993154525757, - "learning_rate": 1.579940365536339e-05, - "loss": 1.2381, - "step": 1236 - }, - { - "epoch": 0.9362346263008514, - "grad_norm": 2.424055337905884, - "learning_rate": 1.5792861627519554e-05, - "loss": 1.2035, - "step": 1237 - }, - { - "epoch": 0.9369914853358562, - "grad_norm": 2.270042896270752, - "learning_rate": 1.578631586633004e-05, - "loss": 1.2268, - "step": 1238 - }, - { - "epoch": 0.937748344370861, - "grad_norm": 2.38864803314209, - "learning_rate": 1.5779766376014146e-05, - "loss": 1.2202, - "step": 1239 - }, - { - "epoch": 0.9385052034058656, - "grad_norm": 2.137854814529419, - "learning_rate": 1.5773213160793574e-05, - "loss": 1.2246, - "step": 1240 - }, - { - "epoch": 0.9392620624408704, - "grad_norm": 2.5035834312438965, - "learning_rate": 1.5766656224892424e-05, - "loss": 1.2246, - "step": 1241 - }, - { - "epoch": 0.9400189214758751, - "grad_norm": 2.498552083969116, - "learning_rate": 1.5760095572537207e-05, - "loss": 1.2037, - "step": 1242 - }, - { - "epoch": 0.9407757805108798, - "grad_norm": 2.0278542041778564, - "learning_rate": 1.5753531207956806e-05, - "loss": 1.2197, - "step": 1243 - }, - { - "epoch": 0.9415326395458846, - "grad_norm": 2.1201868057250977, - "learning_rate": 1.5746963135382522e-05, - "loss": 1.1557, - "step": 1244 - }, - { - "epoch": 0.9422894985808893, - "grad_norm": 2.480867385864258, - "learning_rate": 1.574039135904802e-05, - "loss": 1.2006, - "step": 1245 - }, - { - "epoch": 0.9430463576158941, - "grad_norm": 2.257807970046997, - "learning_rate": 1.573381588318938e-05, - "loss": 1.2235, - "step": 1246 - }, - { - "epoch": 0.9438032166508987, - "grad_norm": 2.2047722339630127, - "learning_rate": 1.5727236712045053e-05, - "loss": 1.1904, - "step": 1247 - }, - { - "epoch": 0.9445600756859035, - "grad_norm": 2.2862167358398438, - "learning_rate": 1.5720653849855862e-05, - "loss": 1.2388, - "step": 1248 - }, - { - "epoch": 0.9453169347209083, - "grad_norm": 2.2071452140808105, - "learning_rate": 1.571406730086503e-05, - "loss": 1.1967, - "step": 1249 - }, - { - "epoch": 0.9460737937559129, - "grad_norm": 2.1805355548858643, - "learning_rate": 1.5707477069318143e-05, - "loss": 1.1927, - "step": 1250 - }, - { - "epoch": 0.9468306527909177, - "grad_norm": 2.156611204147339, - "learning_rate": 1.5700883159463162e-05, - "loss": 1.2216, - "step": 1251 - }, - { - "epoch": 0.9475875118259224, - "grad_norm": 2.2290961742401123, - "learning_rate": 1.5694285575550416e-05, - "loss": 1.2116, - "step": 1252 - }, - { - "epoch": 0.9483443708609272, - "grad_norm": 2.0691416263580322, - "learning_rate": 1.568768432183262e-05, - "loss": 1.2077, - "step": 1253 - }, - { - "epoch": 0.9491012298959319, - "grad_norm": 2.2860946655273438, - "learning_rate": 1.568107940256483e-05, - "loss": 1.2392, - "step": 1254 - }, - { - "epoch": 0.9498580889309366, - "grad_norm": 2.3357367515563965, - "learning_rate": 1.567447082200448e-05, - "loss": 1.2469, - "step": 1255 - }, - { - "epoch": 0.9506149479659414, - "grad_norm": 2.224269151687622, - "learning_rate": 1.566785858441136e-05, - "loss": 1.2553, - "step": 1256 - }, - { - "epoch": 0.951371807000946, - "grad_norm": 2.274747133255005, - "learning_rate": 1.566124269404762e-05, - "loss": 1.1486, - "step": 1257 - }, - { - "epoch": 0.9521286660359508, - "grad_norm": 2.205291271209717, - "learning_rate": 1.5654623155177758e-05, - "loss": 1.228, - "step": 1258 - }, - { - "epoch": 0.9528855250709556, - "grad_norm": 2.059138774871826, - "learning_rate": 1.564799997206863e-05, - "loss": 1.152, - "step": 1259 - }, - { - "epoch": 0.9536423841059603, - "grad_norm": 2.382854700088501, - "learning_rate": 1.564137314898944e-05, - "loss": 1.2262, - "step": 1260 - }, - { - "epoch": 0.954399243140965, - "grad_norm": 2.1926519870758057, - "learning_rate": 1.563474269021174e-05, - "loss": 1.1439, - "step": 1261 - }, - { - "epoch": 0.9551561021759697, - "grad_norm": 2.247835159301758, - "learning_rate": 1.5628108600009414e-05, - "loss": 1.2191, - "step": 1262 - }, - { - "epoch": 0.9559129612109745, - "grad_norm": 2.6202445030212402, - "learning_rate": 1.5621470882658696e-05, - "loss": 1.1955, - "step": 1263 - }, - { - "epoch": 0.9566698202459792, - "grad_norm": 2.1109254360198975, - "learning_rate": 1.5614829542438162e-05, - "loss": 1.2208, - "step": 1264 - }, - { - "epoch": 0.9574266792809839, - "grad_norm": 2.0421035289764404, - "learning_rate": 1.5608184583628723e-05, - "loss": 1.2597, - "step": 1265 - }, - { - "epoch": 0.9581835383159887, - "grad_norm": 2.3527796268463135, - "learning_rate": 1.5601536010513608e-05, - "loss": 1.2437, - "step": 1266 - }, - { - "epoch": 0.9589403973509933, - "grad_norm": 2.39426851272583, - "learning_rate": 1.559488382737839e-05, - "loss": 1.2418, - "step": 1267 - }, - { - "epoch": 0.9596972563859981, - "grad_norm": 2.546283483505249, - "learning_rate": 1.558822803851097e-05, - "loss": 1.2295, - "step": 1268 - }, - { - "epoch": 0.9604541154210028, - "grad_norm": 2.275153160095215, - "learning_rate": 1.558156864820156e-05, - "loss": 1.1896, - "step": 1269 - }, - { - "epoch": 0.9612109744560076, - "grad_norm": 2.1879630088806152, - "learning_rate": 1.5574905660742707e-05, - "loss": 1.1766, - "step": 1270 - }, - { - "epoch": 0.9619678334910123, - "grad_norm": 2.3438518047332764, - "learning_rate": 1.556823908042927e-05, - "loss": 1.1828, - "step": 1271 - }, - { - "epoch": 0.962724692526017, - "grad_norm": 2.673069477081299, - "learning_rate": 1.5561568911558422e-05, - "loss": 1.1909, - "step": 1272 - }, - { - "epoch": 0.9634815515610218, - "grad_norm": 2.3552541732788086, - "learning_rate": 1.5554895158429654e-05, - "loss": 1.2246, - "step": 1273 - }, - { - "epoch": 0.9642384105960264, - "grad_norm": 2.1169943809509277, - "learning_rate": 1.5548217825344765e-05, - "loss": 1.2018, - "step": 1274 - }, - { - "epoch": 0.9649952696310312, - "grad_norm": 2.1841084957122803, - "learning_rate": 1.5541536916607863e-05, - "loss": 1.1496, - "step": 1275 - }, - { - "epoch": 0.965752128666036, - "grad_norm": 2.1429550647735596, - "learning_rate": 1.553485243652536e-05, - "loss": 1.1537, - "step": 1276 - }, - { - "epoch": 0.9665089877010407, - "grad_norm": 2.6029670238494873, - "learning_rate": 1.5528164389405972e-05, - "loss": 1.2348, - "step": 1277 - }, - { - "epoch": 0.9672658467360454, - "grad_norm": 2.105222463607788, - "learning_rate": 1.5521472779560705e-05, - "loss": 1.217, - "step": 1278 - }, - { - "epoch": 0.9680227057710501, - "grad_norm": 2.1541764736175537, - "learning_rate": 1.5514777611302875e-05, - "loss": 1.1844, - "step": 1279 - }, - { - "epoch": 0.9687795648060549, - "grad_norm": 2.0249156951904297, - "learning_rate": 1.5508078888948086e-05, - "loss": 1.2191, - "step": 1280 - }, - { - "epoch": 0.9695364238410596, - "grad_norm": 2.187110185623169, - "learning_rate": 1.550137661681423e-05, - "loss": 1.1811, - "step": 1281 - }, - { - "epoch": 0.9702932828760643, - "grad_norm": 2.27626371383667, - "learning_rate": 1.5494670799221485e-05, - "loss": 1.186, - "step": 1282 - }, - { - "epoch": 0.9710501419110691, - "grad_norm": 2.0355005264282227, - "learning_rate": 1.5487961440492327e-05, - "loss": 1.2338, - "step": 1283 - }, - { - "epoch": 0.9718070009460738, - "grad_norm": 2.126351833343506, - "learning_rate": 1.54812485449515e-05, - "loss": 1.2129, - "step": 1284 - }, - { - "epoch": 0.9725638599810785, - "grad_norm": 2.150451421737671, - "learning_rate": 1.5474532116926037e-05, - "loss": 1.1812, - "step": 1285 - }, - { - "epoch": 0.9733207190160833, - "grad_norm": 2.0796091556549072, - "learning_rate": 1.5467812160745245e-05, - "loss": 1.2273, - "step": 1286 - }, - { - "epoch": 0.974077578051088, - "grad_norm": 2.349214792251587, - "learning_rate": 1.5461088680740702e-05, - "loss": 1.2286, - "step": 1287 - }, - { - "epoch": 0.9748344370860927, - "grad_norm": 2.1848902702331543, - "learning_rate": 1.545436168124627e-05, - "loss": 1.2239, - "step": 1288 - }, - { - "epoch": 0.9755912961210974, - "grad_norm": 2.261702299118042, - "learning_rate": 1.544763116659806e-05, - "loss": 1.202, - "step": 1289 - }, - { - "epoch": 0.9763481551561022, - "grad_norm": 2.2427971363067627, - "learning_rate": 1.5440897141134464e-05, - "loss": 1.2133, - "step": 1290 - }, - { - "epoch": 0.977105014191107, - "grad_norm": 2.076875686645508, - "learning_rate": 1.5434159609196128e-05, - "loss": 1.2056, - "step": 1291 - }, - { - "epoch": 0.9778618732261116, - "grad_norm": 2.26599383354187, - "learning_rate": 1.542741857512597e-05, - "loss": 1.195, - "step": 1292 - }, - { - "epoch": 0.9786187322611164, - "grad_norm": 2.262747049331665, - "learning_rate": 1.5420674043269152e-05, - "loss": 1.2286, - "step": 1293 - }, - { - "epoch": 0.9793755912961211, - "grad_norm": 2.1384646892547607, - "learning_rate": 1.5413926017973097e-05, - "loss": 1.1843, - "step": 1294 - }, - { - "epoch": 0.9801324503311258, - "grad_norm": 2.3019633293151855, - "learning_rate": 1.540717450358748e-05, - "loss": 1.2474, - "step": 1295 - }, - { - "epoch": 0.9808893093661306, - "grad_norm": 2.6212801933288574, - "learning_rate": 1.5400419504464222e-05, - "loss": 1.2305, - "step": 1296 - }, - { - "epoch": 0.9816461684011353, - "grad_norm": 2.415092706680298, - "learning_rate": 1.5393661024957495e-05, - "loss": 1.2394, - "step": 1297 - }, - { - "epoch": 0.98240302743614, - "grad_norm": 2.392845392227173, - "learning_rate": 1.5386899069423712e-05, - "loss": 1.1971, - "step": 1298 - }, - { - "epoch": 0.9831598864711447, - "grad_norm": 2.291163206100464, - "learning_rate": 1.5380133642221525e-05, - "loss": 1.2004, - "step": 1299 - }, - { - "epoch": 0.9839167455061495, - "grad_norm": 2.191312313079834, - "learning_rate": 1.5373364747711825e-05, - "loss": 1.1677, - "step": 1300 - }, - { - "epoch": 0.9846736045411542, - "grad_norm": 2.2666783332824707, - "learning_rate": 1.536659239025774e-05, - "loss": 1.1656, - "step": 1301 - }, - { - "epoch": 0.9854304635761589, - "grad_norm": 2.578672409057617, - "learning_rate": 1.5359816574224626e-05, - "loss": 1.2021, - "step": 1302 - }, - { - "epoch": 0.9861873226111637, - "grad_norm": 2.1345741748809814, - "learning_rate": 1.5353037303980075e-05, - "loss": 1.2277, - "step": 1303 - }, - { - "epoch": 0.9869441816461684, - "grad_norm": 2.1685898303985596, - "learning_rate": 1.5346254583893895e-05, - "loss": 1.206, - "step": 1304 - }, - { - "epoch": 0.9877010406811731, - "grad_norm": 2.3150031566619873, - "learning_rate": 1.533946841833813e-05, - "loss": 1.1747, - "step": 1305 - }, - { - "epoch": 0.9884578997161778, - "grad_norm": 2.3677496910095215, - "learning_rate": 1.5332678811687034e-05, - "loss": 1.2502, - "step": 1306 - }, - { - "epoch": 0.9892147587511826, - "grad_norm": 2.0479371547698975, - "learning_rate": 1.5325885768317085e-05, - "loss": 1.129, - "step": 1307 - }, - { - "epoch": 0.9899716177861874, - "grad_norm": 2.272096633911133, - "learning_rate": 1.531908929260698e-05, - "loss": 1.1515, - "step": 1308 - }, - { - "epoch": 0.990728476821192, - "grad_norm": 2.233167886734009, - "learning_rate": 1.5312289388937613e-05, - "loss": 1.1576, - "step": 1309 - }, - { - "epoch": 0.9914853358561968, - "grad_norm": 2.329028606414795, - "learning_rate": 1.530548606169211e-05, - "loss": 1.2331, - "step": 1310 - }, - { - "epoch": 0.9922421948912015, - "grad_norm": 2.3802735805511475, - "learning_rate": 1.5298679315255786e-05, - "loss": 1.1576, - "step": 1311 - }, - { - "epoch": 0.9929990539262062, - "grad_norm": 2.45041561126709, - "learning_rate": 1.5291869154016167e-05, - "loss": 1.2241, - "step": 1312 - }, - { - "epoch": 0.993755912961211, - "grad_norm": 2.528601884841919, - "learning_rate": 1.5285055582362975e-05, - "loss": 1.2257, - "step": 1313 - }, - { - "epoch": 0.9945127719962157, - "grad_norm": 2.2096829414367676, - "learning_rate": 1.5278238604688143e-05, - "loss": 1.1959, - "step": 1314 - }, - { - "epoch": 0.9952696310312205, - "grad_norm": 2.218921184539795, - "learning_rate": 1.5271418225385784e-05, - "loss": 1.1444, - "step": 1315 - }, - { - "epoch": 0.9960264900662251, - "grad_norm": 2.563999891281128, - "learning_rate": 1.526459444885221e-05, - "loss": 1.192, - "step": 1316 - }, - { - "epoch": 0.9967833491012299, - "grad_norm": 2.6427245140075684, - "learning_rate": 1.5257767279485934e-05, - "loss": 1.1575, - "step": 1317 - }, - { - "epoch": 0.9975402081362347, - "grad_norm": 2.0441269874572754, - "learning_rate": 1.5250936721687628e-05, - "loss": 1.1636, - "step": 1318 - }, - { - "epoch": 0.9982970671712393, - "grad_norm": 2.560488700866699, - "learning_rate": 1.5244102779860178e-05, - "loss": 1.2198, - "step": 1319 - }, - { - "epoch": 0.9990539262062441, - "grad_norm": 2.5017917156219482, - "learning_rate": 1.5237265458408637e-05, - "loss": 1.2215, - "step": 1320 - }, - { - "epoch": 0.9998107852412488, - "grad_norm": 2.3458917140960693, - "learning_rate": 1.5230424761740234e-05, - "loss": 1.1645, - "step": 1321 - }, - { - "epoch": 1.0005676442762534, - "grad_norm": 2.2212741374969482, - "learning_rate": 1.5223580694264382e-05, - "loss": 1.2054, - "step": 1322 - }, - { - "epoch": 1.0013245033112583, - "grad_norm": 2.394789695739746, - "learning_rate": 1.5216733260392658e-05, - "loss": 1.1085, - "step": 1323 - }, - { - "epoch": 1.002081362346263, - "grad_norm": 2.135875701904297, - "learning_rate": 1.5209882464538817e-05, - "loss": 1.1754, - "step": 1324 - }, - { - "epoch": 1.0028382213812677, - "grad_norm": 2.4602649211883545, - "learning_rate": 1.5203028311118778e-05, - "loss": 1.1202, - "step": 1325 - }, - { - "epoch": 1.0035950804162725, - "grad_norm": 2.5135326385498047, - "learning_rate": 1.5196170804550618e-05, - "loss": 1.147, - "step": 1326 - }, - { - "epoch": 1.0043519394512772, - "grad_norm": 2.3376166820526123, - "learning_rate": 1.518930994925459e-05, - "loss": 1.1715, - "step": 1327 - }, - { - "epoch": 1.0051087984862819, - "grad_norm": 2.2913684844970703, - "learning_rate": 1.518244574965309e-05, - "loss": 1.2037, - "step": 1328 - }, - { - "epoch": 1.0058656575212868, - "grad_norm": 2.34970760345459, - "learning_rate": 1.5175578210170678e-05, - "loss": 1.155, - "step": 1329 - }, - { - "epoch": 1.0066225165562914, - "grad_norm": 2.345945119857788, - "learning_rate": 1.5168707335234067e-05, - "loss": 1.1392, - "step": 1330 - }, - { - "epoch": 1.007379375591296, - "grad_norm": 2.2599105834960938, - "learning_rate": 1.5161833129272117e-05, - "loss": 1.1402, - "step": 1331 - }, - { - "epoch": 1.0081362346263008, - "grad_norm": 2.5355637073516846, - "learning_rate": 1.5154955596715836e-05, - "loss": 1.2212, - "step": 1332 - }, - { - "epoch": 1.0088930936613056, - "grad_norm": 2.4323315620422363, - "learning_rate": 1.5148074741998377e-05, - "loss": 1.1289, - "step": 1333 - }, - { - "epoch": 1.0096499526963103, - "grad_norm": 2.232952833175659, - "learning_rate": 1.5141190569555033e-05, - "loss": 1.1044, - "step": 1334 - }, - { - "epoch": 1.010406811731315, - "grad_norm": 2.1613996028900146, - "learning_rate": 1.513430308382324e-05, - "loss": 1.1678, - "step": 1335 - }, - { - "epoch": 1.0111636707663199, - "grad_norm": 2.369002342224121, - "learning_rate": 1.5127412289242562e-05, - "loss": 1.2219, - "step": 1336 - }, - { - "epoch": 1.0119205298013245, - "grad_norm": 2.2522876262664795, - "learning_rate": 1.51205181902547e-05, - "loss": 1.1728, - "step": 1337 - }, - { - "epoch": 1.0126773888363292, - "grad_norm": 2.172529935836792, - "learning_rate": 1.5113620791303489e-05, - "loss": 1.1599, - "step": 1338 - }, - { - "epoch": 1.013434247871334, - "grad_norm": 2.265456199645996, - "learning_rate": 1.5106720096834885e-05, - "loss": 1.1496, - "step": 1339 - }, - { - "epoch": 1.0141911069063387, - "grad_norm": 2.3640429973602295, - "learning_rate": 1.5099816111296968e-05, - "loss": 1.2001, - "step": 1340 - }, - { - "epoch": 1.0149479659413434, - "grad_norm": 2.227107286453247, - "learning_rate": 1.5092908839139948e-05, - "loss": 1.1911, - "step": 1341 - }, - { - "epoch": 1.015704824976348, - "grad_norm": 2.3177998065948486, - "learning_rate": 1.5085998284816144e-05, - "loss": 1.1233, - "step": 1342 - }, - { - "epoch": 1.016461684011353, - "grad_norm": 2.167343854904175, - "learning_rate": 1.507908445277999e-05, - "loss": 1.2057, - "step": 1343 - }, - { - "epoch": 1.0172185430463576, - "grad_norm": 2.2151575088500977, - "learning_rate": 1.5072167347488042e-05, - "loss": 1.1828, - "step": 1344 - }, - { - "epoch": 1.0179754020813623, - "grad_norm": 2.031900405883789, - "learning_rate": 1.5065246973398959e-05, - "loss": 1.1408, - "step": 1345 - }, - { - "epoch": 1.0187322611163672, - "grad_norm": 2.3186428546905518, - "learning_rate": 1.5058323334973508e-05, - "loss": 1.1698, - "step": 1346 - }, - { - "epoch": 1.0194891201513718, - "grad_norm": 2.2243926525115967, - "learning_rate": 1.5051396436674562e-05, - "loss": 1.1983, - "step": 1347 - }, - { - "epoch": 1.0202459791863765, - "grad_norm": 2.0334129333496094, - "learning_rate": 1.5044466282967092e-05, - "loss": 1.1257, - "step": 1348 - }, - { - "epoch": 1.0210028382213812, - "grad_norm": 2.194042921066284, - "learning_rate": 1.503753287831817e-05, - "loss": 1.224, - "step": 1349 - }, - { - "epoch": 1.021759697256386, - "grad_norm": 2.2667534351348877, - "learning_rate": 1.5030596227196963e-05, - "loss": 1.2042, - "step": 1350 - }, - { - "epoch": 1.0225165562913907, - "grad_norm": 2.2039318084716797, - "learning_rate": 1.5023656334074732e-05, - "loss": 1.1965, - "step": 1351 - }, - { - "epoch": 1.0232734153263954, - "grad_norm": 2.3508946895599365, - "learning_rate": 1.5016713203424824e-05, - "loss": 1.1718, - "step": 1352 - }, - { - "epoch": 1.0240302743614003, - "grad_norm": 2.135310649871826, - "learning_rate": 1.5009766839722679e-05, - "loss": 1.1503, - "step": 1353 - }, - { - "epoch": 1.024787133396405, - "grad_norm": 2.2958900928497314, - "learning_rate": 1.5002817247445813e-05, - "loss": 1.2141, - "step": 1354 - }, - { - "epoch": 1.0255439924314096, - "grad_norm": 2.3174233436584473, - "learning_rate": 1.4995864431073828e-05, - "loss": 1.158, - "step": 1355 - }, - { - "epoch": 1.0263008514664145, - "grad_norm": 2.1523966789245605, - "learning_rate": 1.4988908395088405e-05, - "loss": 1.1757, - "step": 1356 - }, - { - "epoch": 1.0270577105014191, - "grad_norm": 2.2384963035583496, - "learning_rate": 1.4981949143973297e-05, - "loss": 1.1391, - "step": 1357 - }, - { - "epoch": 1.0278145695364238, - "grad_norm": 2.1168923377990723, - "learning_rate": 1.4974986682214332e-05, - "loss": 1.1306, - "step": 1358 - }, - { - "epoch": 1.0285714285714285, - "grad_norm": 2.392561435699463, - "learning_rate": 1.4968021014299409e-05, - "loss": 1.2224, - "step": 1359 - }, - { - "epoch": 1.0293282876064334, - "grad_norm": 2.2257487773895264, - "learning_rate": 1.4961052144718486e-05, - "loss": 1.1284, - "step": 1360 - }, - { - "epoch": 1.030085146641438, - "grad_norm": 2.0494847297668457, - "learning_rate": 1.4954080077963596e-05, - "loss": 1.1204, - "step": 1361 - }, - { - "epoch": 1.0308420056764427, - "grad_norm": 2.2043280601501465, - "learning_rate": 1.4947104818528822e-05, - "loss": 1.2135, - "step": 1362 - }, - { - "epoch": 1.0315988647114476, - "grad_norm": 2.1744041442871094, - "learning_rate": 1.494012637091031e-05, - "loss": 1.1533, - "step": 1363 - }, - { - "epoch": 1.0323557237464522, - "grad_norm": 2.1696369647979736, - "learning_rate": 1.4933144739606262e-05, - "loss": 1.171, - "step": 1364 - }, - { - "epoch": 1.033112582781457, - "grad_norm": 2.259871006011963, - "learning_rate": 1.4926159929116934e-05, - "loss": 1.1689, - "step": 1365 - }, - { - "epoch": 1.0338694418164618, - "grad_norm": 2.3883163928985596, - "learning_rate": 1.4919171943944628e-05, - "loss": 1.1808, - "step": 1366 - }, - { - "epoch": 1.0346263008514665, - "grad_norm": 2.4137635231018066, - "learning_rate": 1.4912180788593686e-05, - "loss": 1.2425, - "step": 1367 - }, - { - "epoch": 1.0353831598864711, - "grad_norm": 2.2681548595428467, - "learning_rate": 1.4905186467570509e-05, - "loss": 1.1808, - "step": 1368 - }, - { - "epoch": 1.0361400189214758, - "grad_norm": 2.2818410396575928, - "learning_rate": 1.4898188985383522e-05, - "loss": 1.1897, - "step": 1369 - }, - { - "epoch": 1.0368968779564807, - "grad_norm": 2.2151215076446533, - "learning_rate": 1.4891188346543201e-05, - "loss": 1.136, - "step": 1370 - }, - { - "epoch": 1.0376537369914853, - "grad_norm": 2.248666763305664, - "learning_rate": 1.488418455556205e-05, - "loss": 1.1758, - "step": 1371 - }, - { - "epoch": 1.03841059602649, - "grad_norm": 2.2258033752441406, - "learning_rate": 1.4877177616954602e-05, - "loss": 1.1628, - "step": 1372 - }, - { - "epoch": 1.0391674550614949, - "grad_norm": 2.117659091949463, - "learning_rate": 1.4870167535237428e-05, - "loss": 1.2191, - "step": 1373 - }, - { - "epoch": 1.0399243140964995, - "grad_norm": 2.367983102798462, - "learning_rate": 1.4863154314929114e-05, - "loss": 1.1415, - "step": 1374 - }, - { - "epoch": 1.0406811731315042, - "grad_norm": 2.134035587310791, - "learning_rate": 1.4856137960550278e-05, - "loss": 1.2032, - "step": 1375 - }, - { - "epoch": 1.0414380321665089, - "grad_norm": 2.392430543899536, - "learning_rate": 1.4849118476623556e-05, - "loss": 1.197, - "step": 1376 - }, - { - "epoch": 1.0421948912015138, - "grad_norm": 2.237036943435669, - "learning_rate": 1.4842095867673603e-05, - "loss": 1.168, - "step": 1377 - }, - { - "epoch": 1.0429517502365184, - "grad_norm": 2.338472604751587, - "learning_rate": 1.4835070138227077e-05, - "loss": 1.1629, - "step": 1378 - }, - { - "epoch": 1.043708609271523, - "grad_norm": 2.1505656242370605, - "learning_rate": 1.4828041292812662e-05, - "loss": 1.1559, - "step": 1379 - }, - { - "epoch": 1.044465468306528, - "grad_norm": 2.155229330062866, - "learning_rate": 1.4821009335961045e-05, - "loss": 1.1477, - "step": 1380 - }, - { - "epoch": 1.0452223273415326, - "grad_norm": 2.1831212043762207, - "learning_rate": 1.4813974272204918e-05, - "loss": 1.1486, - "step": 1381 - }, - { - "epoch": 1.0459791863765373, - "grad_norm": 2.2904438972473145, - "learning_rate": 1.4806936106078971e-05, - "loss": 1.1605, - "step": 1382 - }, - { - "epoch": 1.0467360454115422, - "grad_norm": 2.416222333908081, - "learning_rate": 1.4799894842119906e-05, - "loss": 1.1161, - "step": 1383 - }, - { - "epoch": 1.0474929044465469, - "grad_norm": 2.2631683349609375, - "learning_rate": 1.4792850484866408e-05, - "loss": 1.173, - "step": 1384 - }, - { - "epoch": 1.0482497634815515, - "grad_norm": 2.2983131408691406, - "learning_rate": 1.4785803038859166e-05, - "loss": 1.1584, - "step": 1385 - }, - { - "epoch": 1.0490066225165562, - "grad_norm": 2.1680402755737305, - "learning_rate": 1.4778752508640852e-05, - "loss": 1.1689, - "step": 1386 - }, - { - "epoch": 1.049763481551561, - "grad_norm": 2.161684036254883, - "learning_rate": 1.4771698898756137e-05, - "loss": 1.1772, - "step": 1387 - }, - { - "epoch": 1.0505203405865657, - "grad_norm": 2.048295021057129, - "learning_rate": 1.4764642213751664e-05, - "loss": 1.1598, - "step": 1388 - }, - { - "epoch": 1.0512771996215704, - "grad_norm": 2.0943684577941895, - "learning_rate": 1.4757582458176067e-05, - "loss": 1.1389, - "step": 1389 - }, - { - "epoch": 1.0520340586565753, - "grad_norm": 2.5327534675598145, - "learning_rate": 1.475051963657996e-05, - "loss": 1.1496, - "step": 1390 - }, - { - "epoch": 1.05279091769158, - "grad_norm": 2.1597166061401367, - "learning_rate": 1.4743453753515924e-05, - "loss": 1.1409, - "step": 1391 - }, - { - "epoch": 1.0535477767265846, - "grad_norm": 2.877094268798828, - "learning_rate": 1.4736384813538527e-05, - "loss": 1.1359, - "step": 1392 - }, - { - "epoch": 1.0543046357615895, - "grad_norm": 2.4092910289764404, - "learning_rate": 1.472931282120429e-05, - "loss": 1.1673, - "step": 1393 - }, - { - "epoch": 1.0550614947965942, - "grad_norm": 2.26458740234375, - "learning_rate": 1.4722237781071717e-05, - "loss": 1.167, - "step": 1394 - }, - { - "epoch": 1.0558183538315988, - "grad_norm": 2.2418289184570312, - "learning_rate": 1.4715159697701276e-05, - "loss": 1.1674, - "step": 1395 - }, - { - "epoch": 1.0565752128666035, - "grad_norm": 2.948460102081299, - "learning_rate": 1.470807857565538e-05, - "loss": 1.1459, - "step": 1396 - }, - { - "epoch": 1.0573320719016084, - "grad_norm": 2.1875085830688477, - "learning_rate": 1.4700994419498423e-05, - "loss": 1.1781, - "step": 1397 - }, - { - "epoch": 1.058088930936613, - "grad_norm": 2.3909361362457275, - "learning_rate": 1.4693907233796737e-05, - "loss": 1.1346, - "step": 1398 - }, - { - "epoch": 1.0588457899716177, - "grad_norm": 2.12752628326416, - "learning_rate": 1.4686817023118619e-05, - "loss": 1.1127, - "step": 1399 - }, - { - "epoch": 1.0596026490066226, - "grad_norm": 2.3758580684661865, - "learning_rate": 1.4679723792034304e-05, - "loss": 1.1667, - "step": 1400 - }, - { - "epoch": 1.0603595080416273, - "grad_norm": 2.23144793510437, - "learning_rate": 1.4672627545115991e-05, - "loss": 1.1693, - "step": 1401 - }, - { - "epoch": 1.061116367076632, - "grad_norm": 2.2588181495666504, - "learning_rate": 1.46655282869378e-05, - "loss": 1.1555, - "step": 1402 - }, - { - "epoch": 1.0618732261116368, - "grad_norm": 3.266263961791992, - "learning_rate": 1.4658426022075816e-05, - "loss": 1.1475, - "step": 1403 - }, - { - "epoch": 1.0626300851466415, - "grad_norm": 2.344022750854492, - "learning_rate": 1.4651320755108042e-05, - "loss": 1.1151, - "step": 1404 - }, - { - "epoch": 1.0633869441816461, - "grad_norm": 2.701164722442627, - "learning_rate": 1.464421249061443e-05, - "loss": 1.1547, - "step": 1405 - }, - { - "epoch": 1.0641438032166508, - "grad_norm": 2.4534714221954346, - "learning_rate": 1.4637101233176856e-05, - "loss": 1.17, - "step": 1406 - }, - { - "epoch": 1.0649006622516557, - "grad_norm": 2.413388252258301, - "learning_rate": 1.462998698737913e-05, - "loss": 1.1852, - "step": 1407 - }, - { - "epoch": 1.0656575212866604, - "grad_norm": 2.2986247539520264, - "learning_rate": 1.4622869757806983e-05, - "loss": 1.1544, - "step": 1408 - }, - { - "epoch": 1.066414380321665, - "grad_norm": 2.308239221572876, - "learning_rate": 1.4615749549048076e-05, - "loss": 1.1572, - "step": 1409 - }, - { - "epoch": 1.06717123935667, - "grad_norm": 2.270495653152466, - "learning_rate": 1.4608626365691986e-05, - "loss": 1.1625, - "step": 1410 - }, - { - "epoch": 1.0679280983916746, - "grad_norm": 2.0776920318603516, - "learning_rate": 1.4601500212330213e-05, - "loss": 1.1879, - "step": 1411 - }, - { - "epoch": 1.0686849574266792, - "grad_norm": 2.279533863067627, - "learning_rate": 1.4594371093556159e-05, - "loss": 1.1844, - "step": 1412 - }, - { - "epoch": 1.069441816461684, - "grad_norm": 2.263552188873291, - "learning_rate": 1.4587239013965149e-05, - "loss": 1.1192, - "step": 1413 - }, - { - "epoch": 1.0701986754966888, - "grad_norm": 2.1875579357147217, - "learning_rate": 1.4580103978154414e-05, - "loss": 1.1921, - "step": 1414 - }, - { - "epoch": 1.0709555345316935, - "grad_norm": 2.553298234939575, - "learning_rate": 1.4572965990723083e-05, - "loss": 1.1307, - "step": 1415 - }, - { - "epoch": 1.0717123935666981, - "grad_norm": 2.0610175132751465, - "learning_rate": 1.4565825056272199e-05, - "loss": 1.2057, - "step": 1416 - }, - { - "epoch": 1.072469252601703, - "grad_norm": 3.3085532188415527, - "learning_rate": 1.4558681179404704e-05, - "loss": 1.1566, - "step": 1417 - }, - { - "epoch": 1.0732261116367077, - "grad_norm": 2.1448001861572266, - "learning_rate": 1.4551534364725422e-05, - "loss": 1.1958, - "step": 1418 - }, - { - "epoch": 1.0739829706717123, - "grad_norm": 2.5602312088012695, - "learning_rate": 1.4544384616841084e-05, - "loss": 1.1513, - "step": 1419 - }, - { - "epoch": 1.0747398297067172, - "grad_norm": 2.34245228767395, - "learning_rate": 1.4537231940360315e-05, - "loss": 1.1331, - "step": 1420 - }, - { - "epoch": 1.0754966887417219, - "grad_norm": 2.5702669620513916, - "learning_rate": 1.4530076339893615e-05, - "loss": 1.1369, - "step": 1421 - }, - { - "epoch": 1.0762535477767265, - "grad_norm": 2.3624837398529053, - "learning_rate": 1.4522917820053375e-05, - "loss": 1.1618, - "step": 1422 - }, - { - "epoch": 1.0770104068117312, - "grad_norm": 2.3341963291168213, - "learning_rate": 1.4515756385453868e-05, - "loss": 1.1688, - "step": 1423 - }, - { - "epoch": 1.077767265846736, - "grad_norm": 2.32336688041687, - "learning_rate": 1.4508592040711246e-05, - "loss": 1.136, - "step": 1424 - }, - { - "epoch": 1.0785241248817408, - "grad_norm": 2.4956133365631104, - "learning_rate": 1.4501424790443544e-05, - "loss": 1.1893, - "step": 1425 - }, - { - "epoch": 1.0792809839167454, - "grad_norm": 2.5766842365264893, - "learning_rate": 1.4494254639270646e-05, - "loss": 1.104, - "step": 1426 - }, - { - "epoch": 1.0800378429517503, - "grad_norm": 2.3494839668273926, - "learning_rate": 1.4487081591814336e-05, - "loss": 1.1509, - "step": 1427 - }, - { - "epoch": 1.080794701986755, - "grad_norm": 2.25639009475708, - "learning_rate": 1.4479905652698248e-05, - "loss": 1.1518, - "step": 1428 - }, - { - "epoch": 1.0815515610217596, - "grad_norm": 2.454833984375, - "learning_rate": 1.4472726826547876e-05, - "loss": 1.2164, - "step": 1429 - }, - { - "epoch": 1.0823084200567645, - "grad_norm": 2.320312976837158, - "learning_rate": 1.4465545117990587e-05, - "loss": 1.1562, - "step": 1430 - }, - { - "epoch": 1.0830652790917692, - "grad_norm": 2.136070966720581, - "learning_rate": 1.4458360531655606e-05, - "loss": 1.1621, - "step": 1431 - }, - { - "epoch": 1.0838221381267739, - "grad_norm": 2.3895716667175293, - "learning_rate": 1.4451173072173996e-05, - "loss": 1.1442, - "step": 1432 - }, - { - "epoch": 1.0845789971617785, - "grad_norm": 2.185600757598877, - "learning_rate": 1.4443982744178694e-05, - "loss": 1.1352, - "step": 1433 - }, - { - "epoch": 1.0853358561967834, - "grad_norm": 2.2408831119537354, - "learning_rate": 1.4436789552304471e-05, - "loss": 1.1771, - "step": 1434 - }, - { - "epoch": 1.086092715231788, - "grad_norm": 2.2491331100463867, - "learning_rate": 1.4429593501187952e-05, - "loss": 1.1509, - "step": 1435 - }, - { - "epoch": 1.0868495742667927, - "grad_norm": 2.4954214096069336, - "learning_rate": 1.4422394595467597e-05, - "loss": 1.1333, - "step": 1436 - }, - { - "epoch": 1.0876064333017976, - "grad_norm": 2.427107572555542, - "learning_rate": 1.4415192839783716e-05, - "loss": 1.1806, - "step": 1437 - }, - { - "epoch": 1.0883632923368023, - "grad_norm": 5.063598155975342, - "learning_rate": 1.4407988238778448e-05, - "loss": 1.1364, - "step": 1438 - }, - { - "epoch": 1.089120151371807, - "grad_norm": 2.261101245880127, - "learning_rate": 1.4400780797095769e-05, - "loss": 1.171, - "step": 1439 - }, - { - "epoch": 1.0898770104068118, - "grad_norm": 2.3400819301605225, - "learning_rate": 1.4393570519381484e-05, - "loss": 1.1354, - "step": 1440 - }, - { - "epoch": 1.0906338694418165, - "grad_norm": 2.225931167602539, - "learning_rate": 1.438635741028323e-05, - "loss": 1.1674, - "step": 1441 - }, - { - "epoch": 1.0913907284768212, - "grad_norm": 2.3904130458831787, - "learning_rate": 1.437914147445047e-05, - "loss": 1.1513, - "step": 1442 - }, - { - "epoch": 1.0921475875118258, - "grad_norm": 2.4583778381347656, - "learning_rate": 1.4371922716534483e-05, - "loss": 1.1708, - "step": 1443 - }, - { - "epoch": 1.0929044465468307, - "grad_norm": 2.270364761352539, - "learning_rate": 1.436470114118837e-05, - "loss": 1.1708, - "step": 1444 - }, - { - "epoch": 1.0936613055818354, - "grad_norm": 2.190642833709717, - "learning_rate": 1.4357476753067053e-05, - "loss": 1.123, - "step": 1445 - }, - { - "epoch": 1.09441816461684, - "grad_norm": 2.3521080017089844, - "learning_rate": 1.4350249556827256e-05, - "loss": 1.1793, - "step": 1446 - }, - { - "epoch": 1.095175023651845, - "grad_norm": 2.4334235191345215, - "learning_rate": 1.4343019557127522e-05, - "loss": 1.175, - "step": 1447 - }, - { - "epoch": 1.0959318826868496, - "grad_norm": 2.2919211387634277, - "learning_rate": 1.4335786758628199e-05, - "loss": 1.1601, - "step": 1448 - }, - { - "epoch": 1.0966887417218543, - "grad_norm": 2.603358745574951, - "learning_rate": 1.4328551165991435e-05, - "loss": 1.1966, - "step": 1449 - }, - { - "epoch": 1.097445600756859, - "grad_norm": 2.47110915184021, - "learning_rate": 1.4321312783881192e-05, - "loss": 1.1623, - "step": 1450 - }, - { - "epoch": 1.0982024597918638, - "grad_norm": 2.54114031791687, - "learning_rate": 1.431407161696321e-05, - "loss": 1.1598, - "step": 1451 - }, - { - "epoch": 1.0989593188268685, - "grad_norm": 2.1958112716674805, - "learning_rate": 1.4306827669905041e-05, - "loss": 1.1317, - "step": 1452 - }, - { - "epoch": 1.0997161778618731, - "grad_norm": 2.2989349365234375, - "learning_rate": 1.4299580947376022e-05, - "loss": 1.1036, - "step": 1453 - }, - { - "epoch": 1.100473036896878, - "grad_norm": 2.331904411315918, - "learning_rate": 1.4292331454047278e-05, - "loss": 1.1331, - "step": 1454 - }, - { - "epoch": 1.1012298959318827, - "grad_norm": 2.376122236251831, - "learning_rate": 1.4285079194591722e-05, - "loss": 1.1649, - "step": 1455 - }, - { - "epoch": 1.1019867549668874, - "grad_norm": 2.453084945678711, - "learning_rate": 1.4277824173684056e-05, - "loss": 1.1636, - "step": 1456 - }, - { - "epoch": 1.1027436140018922, - "grad_norm": 2.4421586990356445, - "learning_rate": 1.4270566396000744e-05, - "loss": 1.1323, - "step": 1457 - }, - { - "epoch": 1.103500473036897, - "grad_norm": 2.308035135269165, - "learning_rate": 1.426330586622005e-05, - "loss": 1.13, - "step": 1458 - }, - { - "epoch": 1.1042573320719016, - "grad_norm": 2.403162956237793, - "learning_rate": 1.4256042589021994e-05, - "loss": 1.181, - "step": 1459 - }, - { - "epoch": 1.1050141911069062, - "grad_norm": 2.4109246730804443, - "learning_rate": 1.4248776569088377e-05, - "loss": 1.1597, - "step": 1460 - }, - { - "epoch": 1.1057710501419111, - "grad_norm": 2.412398099899292, - "learning_rate": 1.4241507811102762e-05, - "loss": 1.118, - "step": 1461 - }, - { - "epoch": 1.1065279091769158, - "grad_norm": 2.8465075492858887, - "learning_rate": 1.4234236319750482e-05, - "loss": 1.1618, - "step": 1462 - }, - { - "epoch": 1.1072847682119205, - "grad_norm": 2.306621789932251, - "learning_rate": 1.4226962099718628e-05, - "loss": 1.2062, - "step": 1463 - }, - { - "epoch": 1.1080416272469253, - "grad_norm": 2.6074671745300293, - "learning_rate": 1.4219685155696053e-05, - "loss": 1.1676, - "step": 1464 - }, - { - "epoch": 1.10879848628193, - "grad_norm": 2.509995460510254, - "learning_rate": 1.421240549237336e-05, - "loss": 1.1771, - "step": 1465 - }, - { - "epoch": 1.1095553453169347, - "grad_norm": 2.535238742828369, - "learning_rate": 1.4205123114442916e-05, - "loss": 1.1682, - "step": 1466 - }, - { - "epoch": 1.1103122043519393, - "grad_norm": 2.4258975982666016, - "learning_rate": 1.4197838026598826e-05, - "loss": 1.0947, - "step": 1467 - }, - { - "epoch": 1.1110690633869442, - "grad_norm": 2.5997817516326904, - "learning_rate": 1.4190550233536946e-05, - "loss": 1.1471, - "step": 1468 - }, - { - "epoch": 1.1118259224219489, - "grad_norm": 2.358372449874878, - "learning_rate": 1.4183259739954877e-05, - "loss": 1.1564, - "step": 1469 - }, - { - "epoch": 1.1125827814569536, - "grad_norm": 2.323791027069092, - "learning_rate": 1.4175966550551963e-05, - "loss": 1.1936, - "step": 1470 - }, - { - "epoch": 1.1133396404919584, - "grad_norm": 2.334627151489258, - "learning_rate": 1.4168670670029277e-05, - "loss": 1.1514, - "step": 1471 - }, - { - "epoch": 1.114096499526963, - "grad_norm": 2.2344837188720703, - "learning_rate": 1.4161372103089637e-05, - "loss": 1.1378, - "step": 1472 - }, - { - "epoch": 1.1148533585619678, - "grad_norm": 2.200742483139038, - "learning_rate": 1.4154070854437587e-05, - "loss": 1.1783, - "step": 1473 - }, - { - "epoch": 1.1156102175969727, - "grad_norm": 2.2466723918914795, - "learning_rate": 1.4146766928779396e-05, - "loss": 1.1419, - "step": 1474 - }, - { - "epoch": 1.1163670766319773, - "grad_norm": 2.4173378944396973, - "learning_rate": 1.4139460330823071e-05, - "loss": 1.0991, - "step": 1475 - }, - { - "epoch": 1.117123935666982, - "grad_norm": 2.3149657249450684, - "learning_rate": 1.413215106527833e-05, - "loss": 1.1419, - "step": 1476 - }, - { - "epoch": 1.1178807947019869, - "grad_norm": 2.2564306259155273, - "learning_rate": 1.4124839136856612e-05, - "loss": 1.1693, - "step": 1477 - }, - { - "epoch": 1.1186376537369915, - "grad_norm": 2.131028652191162, - "learning_rate": 1.4117524550271077e-05, - "loss": 1.158, - "step": 1478 - }, - { - "epoch": 1.1193945127719962, - "grad_norm": 2.4710068702697754, - "learning_rate": 1.4110207310236595e-05, - "loss": 1.1934, - "step": 1479 - }, - { - "epoch": 1.1201513718070009, - "grad_norm": 2.138939380645752, - "learning_rate": 1.4102887421469747e-05, - "loss": 1.1196, - "step": 1480 - }, - { - "epoch": 1.1209082308420057, - "grad_norm": 2.542495012283325, - "learning_rate": 1.4095564888688822e-05, - "loss": 1.1693, - "step": 1481 - }, - { - "epoch": 1.1216650898770104, - "grad_norm": 2.4574832916259766, - "learning_rate": 1.4088239716613816e-05, - "loss": 1.1248, - "step": 1482 - }, - { - "epoch": 1.122421948912015, - "grad_norm": 2.133028268814087, - "learning_rate": 1.4080911909966419e-05, - "loss": 1.2361, - "step": 1483 - }, - { - "epoch": 1.12317880794702, - "grad_norm": 2.624393939971924, - "learning_rate": 1.4073581473470023e-05, - "loss": 1.1053, - "step": 1484 - }, - { - "epoch": 1.1239356669820246, - "grad_norm": 2.0480175018310547, - "learning_rate": 1.4066248411849717e-05, - "loss": 1.1364, - "step": 1485 - }, - { - "epoch": 1.1246925260170293, - "grad_norm": 2.2111339569091797, - "learning_rate": 1.4058912729832286e-05, - "loss": 1.1869, - "step": 1486 - }, - { - "epoch": 1.125449385052034, - "grad_norm": 2.4910013675689697, - "learning_rate": 1.4051574432146191e-05, - "loss": 1.16, - "step": 1487 - }, - { - "epoch": 1.1262062440870388, - "grad_norm": 2.419105052947998, - "learning_rate": 1.4044233523521587e-05, - "loss": 1.1637, - "step": 1488 - }, - { - "epoch": 1.1269631031220435, - "grad_norm": 2.4131598472595215, - "learning_rate": 1.4036890008690316e-05, - "loss": 1.1814, - "step": 1489 - }, - { - "epoch": 1.1277199621570482, - "grad_norm": 2.395854949951172, - "learning_rate": 1.4029543892385898e-05, - "loss": 1.1535, - "step": 1490 - }, - { - "epoch": 1.128476821192053, - "grad_norm": 2.0963070392608643, - "learning_rate": 1.4022195179343518e-05, - "loss": 1.1366, - "step": 1491 - }, - { - "epoch": 1.1292336802270577, - "grad_norm": 2.267829418182373, - "learning_rate": 1.4014843874300052e-05, - "loss": 1.1393, - "step": 1492 - }, - { - "epoch": 1.1299905392620624, - "grad_norm": 2.1519582271575928, - "learning_rate": 1.4007489981994038e-05, - "loss": 1.1728, - "step": 1493 - }, - { - "epoch": 1.1307473982970673, - "grad_norm": 2.194342613220215, - "learning_rate": 1.4000133507165684e-05, - "loss": 1.1586, - "step": 1494 - }, - { - "epoch": 1.131504257332072, - "grad_norm": 2.3476803302764893, - "learning_rate": 1.3992774454556855e-05, - "loss": 1.2297, - "step": 1495 - }, - { - "epoch": 1.1322611163670766, - "grad_norm": 2.1007235050201416, - "learning_rate": 1.3985412828911088e-05, - "loss": 1.1605, - "step": 1496 - }, - { - "epoch": 1.1330179754020813, - "grad_norm": 2.512786388397217, - "learning_rate": 1.397804863497358e-05, - "loss": 1.1765, - "step": 1497 - }, - { - "epoch": 1.1337748344370862, - "grad_norm": 2.1948659420013428, - "learning_rate": 1.397068187749117e-05, - "loss": 1.1912, - "step": 1498 - }, - { - "epoch": 1.1345316934720908, - "grad_norm": 2.348325729370117, - "learning_rate": 1.3963312561212359e-05, - "loss": 1.152, - "step": 1499 - }, - { - "epoch": 1.1352885525070955, - "grad_norm": 2.088045597076416, - "learning_rate": 1.3955940690887301e-05, - "loss": 1.0803, - "step": 1500 - }, - { - "epoch": 1.1360454115421004, - "grad_norm": 2.110816240310669, - "learning_rate": 1.3948566271267784e-05, - "loss": 1.1599, - "step": 1501 - }, - { - "epoch": 1.136802270577105, - "grad_norm": 2.306739330291748, - "learning_rate": 1.3941189307107255e-05, - "loss": 1.2, - "step": 1502 - }, - { - "epoch": 1.1375591296121097, - "grad_norm": 2.494978666305542, - "learning_rate": 1.3933809803160784e-05, - "loss": 1.1418, - "step": 1503 - }, - { - "epoch": 1.1383159886471144, - "grad_norm": 2.4510955810546875, - "learning_rate": 1.3926427764185093e-05, - "loss": 1.1455, - "step": 1504 - }, - { - "epoch": 1.1390728476821192, - "grad_norm": 2.415323495864868, - "learning_rate": 1.3919043194938528e-05, - "loss": 1.1361, - "step": 1505 - }, - { - "epoch": 1.139829706717124, - "grad_norm": 2.263831615447998, - "learning_rate": 1.391165610018107e-05, - "loss": 1.1087, - "step": 1506 - }, - { - "epoch": 1.1405865657521286, - "grad_norm": 2.5898752212524414, - "learning_rate": 1.3904266484674331e-05, - "loss": 1.1339, - "step": 1507 - }, - { - "epoch": 1.1413434247871335, - "grad_norm": 2.153635263442993, - "learning_rate": 1.3896874353181542e-05, - "loss": 1.1024, - "step": 1508 - }, - { - "epoch": 1.1421002838221381, - "grad_norm": 2.095327138900757, - "learning_rate": 1.3889479710467557e-05, - "loss": 1.2094, - "step": 1509 - }, - { - "epoch": 1.1428571428571428, - "grad_norm": 2.1070072650909424, - "learning_rate": 1.388208256129885e-05, - "loss": 1.143, - "step": 1510 - }, - { - "epoch": 1.1436140018921477, - "grad_norm": 2.2180447578430176, - "learning_rate": 1.3874682910443516e-05, - "loss": 1.1682, - "step": 1511 - }, - { - "epoch": 1.1443708609271523, - "grad_norm": 2.3871870040893555, - "learning_rate": 1.3867280762671246e-05, - "loss": 1.1671, - "step": 1512 - }, - { - "epoch": 1.145127719962157, - "grad_norm": 2.165802001953125, - "learning_rate": 1.3859876122753363e-05, - "loss": 1.1138, - "step": 1513 - }, - { - "epoch": 1.145884578997162, - "grad_norm": 2.162033796310425, - "learning_rate": 1.3852468995462785e-05, - "loss": 1.1719, - "step": 1514 - }, - { - "epoch": 1.1466414380321666, - "grad_norm": 2.163429021835327, - "learning_rate": 1.3845059385574023e-05, - "loss": 1.1483, - "step": 1515 - }, - { - "epoch": 1.1473982970671712, - "grad_norm": 2.177055597305298, - "learning_rate": 1.3837647297863203e-05, - "loss": 1.1372, - "step": 1516 - }, - { - "epoch": 1.1481551561021759, - "grad_norm": 2.3266491889953613, - "learning_rate": 1.383023273710805e-05, - "loss": 1.1825, - "step": 1517 - }, - { - "epoch": 1.1489120151371808, - "grad_norm": 2.4812657833099365, - "learning_rate": 1.3822815708087865e-05, - "loss": 1.1697, - "step": 1518 - }, - { - "epoch": 1.1496688741721854, - "grad_norm": 2.1462526321411133, - "learning_rate": 1.3815396215583564e-05, - "loss": 1.1203, - "step": 1519 - }, - { - "epoch": 1.15042573320719, - "grad_norm": 2.160487174987793, - "learning_rate": 1.3807974264377629e-05, - "loss": 1.1322, - "step": 1520 - }, - { - "epoch": 1.1511825922421948, - "grad_norm": 2.234320640563965, - "learning_rate": 1.3800549859254144e-05, - "loss": 1.1393, - "step": 1521 - }, - { - "epoch": 1.1519394512771997, - "grad_norm": 2.4396426677703857, - "learning_rate": 1.3793123004998765e-05, - "loss": 1.1671, - "step": 1522 - }, - { - "epoch": 1.1526963103122043, - "grad_norm": 2.3154118061065674, - "learning_rate": 1.3785693706398724e-05, - "loss": 1.1867, - "step": 1523 - }, - { - "epoch": 1.153453169347209, - "grad_norm": 2.2119319438934326, - "learning_rate": 1.377826196824284e-05, - "loss": 1.1582, - "step": 1524 - }, - { - "epoch": 1.1542100283822139, - "grad_norm": 2.2084405422210693, - "learning_rate": 1.3770827795321495e-05, - "loss": 1.1613, - "step": 1525 - }, - { - "epoch": 1.1549668874172185, - "grad_norm": 2.341912031173706, - "learning_rate": 1.3763391192426644e-05, - "loss": 1.1519, - "step": 1526 - }, - { - "epoch": 1.1557237464522232, - "grad_norm": 2.2736034393310547, - "learning_rate": 1.3755952164351814e-05, - "loss": 1.1465, - "step": 1527 - }, - { - "epoch": 1.156480605487228, - "grad_norm": 2.2359468936920166, - "learning_rate": 1.3748510715892075e-05, - "loss": 1.193, - "step": 1528 - }, - { - "epoch": 1.1572374645222328, - "grad_norm": 1.9430551528930664, - "learning_rate": 1.3741066851844082e-05, - "loss": 1.139, - "step": 1529 - }, - { - "epoch": 1.1579943235572374, - "grad_norm": 2.0962564945220947, - "learning_rate": 1.3733620577006035e-05, - "loss": 1.1442, - "step": 1530 - }, - { - "epoch": 1.1587511825922423, - "grad_norm": 2.0686581134796143, - "learning_rate": 1.3726171896177687e-05, - "loss": 1.1778, - "step": 1531 - }, - { - "epoch": 1.159508041627247, - "grad_norm": 2.120643138885498, - "learning_rate": 1.3718720814160342e-05, - "loss": 1.1789, - "step": 1532 - }, - { - "epoch": 1.1602649006622516, - "grad_norm": 2.4168310165405273, - "learning_rate": 1.3711267335756862e-05, - "loss": 1.1816, - "step": 1533 - }, - { - "epoch": 1.1610217596972563, - "grad_norm": 2.183661460876465, - "learning_rate": 1.3703811465771636e-05, - "loss": 1.1861, - "step": 1534 - }, - { - "epoch": 1.1617786187322612, - "grad_norm": 2.196077346801758, - "learning_rate": 1.3696353209010609e-05, - "loss": 1.1845, - "step": 1535 - }, - { - "epoch": 1.1625354777672658, - "grad_norm": 2.280958890914917, - "learning_rate": 1.3688892570281261e-05, - "loss": 1.1371, - "step": 1536 - }, - { - "epoch": 1.1632923368022705, - "grad_norm": 2.3048434257507324, - "learning_rate": 1.3681429554392602e-05, - "loss": 1.15, - "step": 1537 - }, - { - "epoch": 1.1640491958372754, - "grad_norm": 2.177098512649536, - "learning_rate": 1.367396416615518e-05, - "loss": 1.1537, - "step": 1538 - }, - { - "epoch": 1.16480605487228, - "grad_norm": 2.419185161590576, - "learning_rate": 1.3666496410381072e-05, - "loss": 1.1634, - "step": 1539 - }, - { - "epoch": 1.1655629139072847, - "grad_norm": 2.5214250087738037, - "learning_rate": 1.3659026291883874e-05, - "loss": 1.1669, - "step": 1540 - }, - { - "epoch": 1.1663197729422894, - "grad_norm": 2.3618457317352295, - "learning_rate": 1.365155381547872e-05, - "loss": 1.2169, - "step": 1541 - }, - { - "epoch": 1.1670766319772943, - "grad_norm": 2.3737759590148926, - "learning_rate": 1.3644078985982243e-05, - "loss": 1.2004, - "step": 1542 - }, - { - "epoch": 1.167833491012299, - "grad_norm": 2.500761032104492, - "learning_rate": 1.3636601808212613e-05, - "loss": 1.1576, - "step": 1543 - }, - { - "epoch": 1.1685903500473036, - "grad_norm": 2.429725170135498, - "learning_rate": 1.36291222869895e-05, - "loss": 1.1412, - "step": 1544 - }, - { - "epoch": 1.1693472090823085, - "grad_norm": 2.4820287227630615, - "learning_rate": 1.3621640427134095e-05, - "loss": 1.1256, - "step": 1545 - }, - { - "epoch": 1.1701040681173132, - "grad_norm": 2.5075745582580566, - "learning_rate": 1.3614156233469081e-05, - "loss": 1.1426, - "step": 1546 - }, - { - "epoch": 1.1708609271523178, - "grad_norm": 2.5569803714752197, - "learning_rate": 1.3606669710818665e-05, - "loss": 1.1438, - "step": 1547 - }, - { - "epoch": 1.1716177861873227, - "grad_norm": 2.4151296615600586, - "learning_rate": 1.3599180864008538e-05, - "loss": 1.1325, - "step": 1548 - }, - { - "epoch": 1.1723746452223274, - "grad_norm": 2.1855361461639404, - "learning_rate": 1.3591689697865902e-05, - "loss": 1.1328, - "step": 1549 - }, - { - "epoch": 1.173131504257332, - "grad_norm": 2.130683422088623, - "learning_rate": 1.3584196217219443e-05, - "loss": 1.1411, - "step": 1550 - }, - { - "epoch": 1.173888363292337, - "grad_norm": 2.3407418727874756, - "learning_rate": 1.357670042689935e-05, - "loss": 1.1396, - "step": 1551 - }, - { - "epoch": 1.1746452223273416, - "grad_norm": 2.5837795734405518, - "learning_rate": 1.3569202331737292e-05, - "loss": 1.1858, - "step": 1552 - }, - { - "epoch": 1.1754020813623463, - "grad_norm": 2.2978811264038086, - "learning_rate": 1.3561701936566426e-05, - "loss": 1.1712, - "step": 1553 - }, - { - "epoch": 1.176158940397351, - "grad_norm": 2.5813682079315186, - "learning_rate": 1.355419924622139e-05, - "loss": 1.1282, - "step": 1554 - }, - { - "epoch": 1.1769157994323558, - "grad_norm": 2.0672824382781982, - "learning_rate": 1.3546694265538316e-05, - "loss": 1.1639, - "step": 1555 - }, - { - "epoch": 1.1776726584673605, - "grad_norm": 2.138291358947754, - "learning_rate": 1.3539186999354785e-05, - "loss": 1.1583, - "step": 1556 - }, - { - "epoch": 1.1784295175023651, - "grad_norm": 2.2805378437042236, - "learning_rate": 1.3531677452509873e-05, - "loss": 1.1315, - "step": 1557 - }, - { - "epoch": 1.1791863765373698, - "grad_norm": 2.4369373321533203, - "learning_rate": 1.3524165629844124e-05, - "loss": 1.1395, - "step": 1558 - }, - { - "epoch": 1.1799432355723747, - "grad_norm": 2.610330820083618, - "learning_rate": 1.3516651536199536e-05, - "loss": 1.1534, - "step": 1559 - }, - { - "epoch": 1.1807000946073793, - "grad_norm": 2.1532680988311768, - "learning_rate": 1.3509135176419583e-05, - "loss": 1.1266, - "step": 1560 - }, - { - "epoch": 1.181456953642384, - "grad_norm": 2.269569158554077, - "learning_rate": 1.3501616555349195e-05, - "loss": 1.1962, - "step": 1561 - }, - { - "epoch": 1.182213812677389, - "grad_norm": 2.3179900646209717, - "learning_rate": 1.3494095677834762e-05, - "loss": 1.1554, - "step": 1562 - }, - { - "epoch": 1.1829706717123936, - "grad_norm": 2.116596221923828, - "learning_rate": 1.3486572548724126e-05, - "loss": 1.124, - "step": 1563 - }, - { - "epoch": 1.1837275307473982, - "grad_norm": 2.4712612628936768, - "learning_rate": 1.347904717286658e-05, - "loss": 1.1336, - "step": 1564 - }, - { - "epoch": 1.1844843897824031, - "grad_norm": 2.1904261112213135, - "learning_rate": 1.3471519555112866e-05, - "loss": 1.1613, - "step": 1565 - }, - { - "epoch": 1.1852412488174078, - "grad_norm": 2.235826253890991, - "learning_rate": 1.3463989700315179e-05, - "loss": 1.1404, - "step": 1566 - }, - { - "epoch": 1.1859981078524124, - "grad_norm": 2.2662901878356934, - "learning_rate": 1.3456457613327136e-05, - "loss": 1.1985, - "step": 1567 - }, - { - "epoch": 1.1867549668874173, - "grad_norm": 2.3666486740112305, - "learning_rate": 1.3448923299003815e-05, - "loss": 1.125, - "step": 1568 - }, - { - "epoch": 1.187511825922422, - "grad_norm": 2.1387600898742676, - "learning_rate": 1.344138676220172e-05, - "loss": 1.157, - "step": 1569 - }, - { - "epoch": 1.1882686849574267, - "grad_norm": 2.4021949768066406, - "learning_rate": 1.3433848007778783e-05, - "loss": 1.1628, - "step": 1570 - }, - { - "epoch": 1.1890255439924313, - "grad_norm": 2.100867986679077, - "learning_rate": 1.3426307040594372e-05, - "loss": 1.1712, - "step": 1571 - }, - { - "epoch": 1.1897824030274362, - "grad_norm": 2.6818182468414307, - "learning_rate": 1.3418763865509283e-05, - "loss": 1.1505, - "step": 1572 - }, - { - "epoch": 1.1905392620624409, - "grad_norm": 2.2335827350616455, - "learning_rate": 1.3411218487385725e-05, - "loss": 1.1367, - "step": 1573 - }, - { - "epoch": 1.1912961210974455, - "grad_norm": 2.332047939300537, - "learning_rate": 1.3403670911087339e-05, - "loss": 1.1186, - "step": 1574 - }, - { - "epoch": 1.1920529801324504, - "grad_norm": 2.119150400161743, - "learning_rate": 1.339612114147917e-05, - "loss": 1.1748, - "step": 1575 - }, - { - "epoch": 1.192809839167455, - "grad_norm": 2.413939952850342, - "learning_rate": 1.3388569183427695e-05, - "loss": 1.178, - "step": 1576 - }, - { - "epoch": 1.1935666982024598, - "grad_norm": 2.265653371810913, - "learning_rate": 1.3381015041800787e-05, - "loss": 1.1532, - "step": 1577 - }, - { - "epoch": 1.1943235572374644, - "grad_norm": 2.1941394805908203, - "learning_rate": 1.3373458721467724e-05, - "loss": 1.1027, - "step": 1578 - }, - { - "epoch": 1.1950804162724693, - "grad_norm": 2.350780725479126, - "learning_rate": 1.3365900227299205e-05, - "loss": 1.1373, - "step": 1579 - }, - { - "epoch": 1.195837275307474, - "grad_norm": 2.401061773300171, - "learning_rate": 1.3358339564167313e-05, - "loss": 1.1602, - "step": 1580 - }, - { - "epoch": 1.1965941343424786, - "grad_norm": 2.3053834438323975, - "learning_rate": 1.3350776736945539e-05, - "loss": 1.0973, - "step": 1581 - }, - { - "epoch": 1.1973509933774835, - "grad_norm": 2.3348872661590576, - "learning_rate": 1.3343211750508769e-05, - "loss": 1.1439, - "step": 1582 - }, - { - "epoch": 1.1981078524124882, - "grad_norm": 2.255254030227661, - "learning_rate": 1.333564460973327e-05, - "loss": 1.1259, - "step": 1583 - }, - { - "epoch": 1.1988647114474928, - "grad_norm": 2.1117663383483887, - "learning_rate": 1.332807531949671e-05, - "loss": 1.1075, - "step": 1584 - }, - { - "epoch": 1.1996215704824977, - "grad_norm": 2.2909741401672363, - "learning_rate": 1.3320503884678141e-05, - "loss": 1.1518, - "step": 1585 - }, - { - "epoch": 1.2003784295175024, - "grad_norm": 2.2066426277160645, - "learning_rate": 1.331293031015799e-05, - "loss": 1.1617, - "step": 1586 - }, - { - "epoch": 1.201135288552507, - "grad_norm": 2.2523305416107178, - "learning_rate": 1.3305354600818068e-05, - "loss": 1.1961, - "step": 1587 - }, - { - "epoch": 1.201892147587512, - "grad_norm": 2.1978890895843506, - "learning_rate": 1.3297776761541566e-05, - "loss": 1.154, - "step": 1588 - }, - { - "epoch": 1.2026490066225166, - "grad_norm": 2.333961009979248, - "learning_rate": 1.3290196797213037e-05, - "loss": 1.1201, - "step": 1589 - }, - { - "epoch": 1.2034058656575213, - "grad_norm": 2.1890499591827393, - "learning_rate": 1.3282614712718412e-05, - "loss": 1.1166, - "step": 1590 - }, - { - "epoch": 1.204162724692526, - "grad_norm": 2.2715249061584473, - "learning_rate": 1.3275030512944995e-05, - "loss": 1.1702, - "step": 1591 - }, - { - "epoch": 1.2049195837275308, - "grad_norm": 2.378854513168335, - "learning_rate": 1.3267444202781434e-05, - "loss": 1.1674, - "step": 1592 - }, - { - "epoch": 1.2056764427625355, - "grad_norm": 2.085010290145874, - "learning_rate": 1.3259855787117758e-05, - "loss": 1.1709, - "step": 1593 - }, - { - "epoch": 1.2064333017975402, - "grad_norm": 2.3028149604797363, - "learning_rate": 1.3252265270845339e-05, - "loss": 1.1304, - "step": 1594 - }, - { - "epoch": 1.2071901608325448, - "grad_norm": 2.0950684547424316, - "learning_rate": 1.3244672658856908e-05, - "loss": 1.1585, - "step": 1595 - }, - { - "epoch": 1.2079470198675497, - "grad_norm": 2.2300803661346436, - "learning_rate": 1.3237077956046551e-05, - "loss": 1.1123, - "step": 1596 - }, - { - "epoch": 1.2087038789025544, - "grad_norm": 2.1364376544952393, - "learning_rate": 1.3229481167309692e-05, - "loss": 1.1112, - "step": 1597 - }, - { - "epoch": 1.209460737937559, - "grad_norm": 2.9876246452331543, - "learning_rate": 1.322188229754311e-05, - "loss": 1.1989, - "step": 1598 - }, - { - "epoch": 1.210217596972564, - "grad_norm": 2.1434969902038574, - "learning_rate": 1.3214281351644918e-05, - "loss": 1.1665, - "step": 1599 - }, - { - "epoch": 1.2109744560075686, - "grad_norm": 2.142533779144287, - "learning_rate": 1.3206678334514571e-05, - "loss": 1.1229, - "step": 1600 - }, - { - "epoch": 1.2117313150425733, - "grad_norm": 2.065274715423584, - "learning_rate": 1.3199073251052854e-05, - "loss": 1.1167, - "step": 1601 - }, - { - "epoch": 1.2124881740775781, - "grad_norm": 2.128526449203491, - "learning_rate": 1.3191466106161893e-05, - "loss": 1.1622, - "step": 1602 - }, - { - "epoch": 1.2132450331125828, - "grad_norm": 2.075362205505371, - "learning_rate": 1.3183856904745135e-05, - "loss": 1.1541, - "step": 1603 - }, - { - "epoch": 1.2140018921475875, - "grad_norm": 2.4913156032562256, - "learning_rate": 1.3176245651707357e-05, - "loss": 1.1635, - "step": 1604 - }, - { - "epoch": 1.2147587511825924, - "grad_norm": 2.1509463787078857, - "learning_rate": 1.3168632351954653e-05, - "loss": 1.1317, - "step": 1605 - }, - { - "epoch": 1.215515610217597, - "grad_norm": 2.2484796047210693, - "learning_rate": 1.3161017010394444e-05, - "loss": 1.1342, - "step": 1606 - }, - { - "epoch": 1.2162724692526017, - "grad_norm": 2.2622358798980713, - "learning_rate": 1.3153399631935463e-05, - "loss": 1.1416, - "step": 1607 - }, - { - "epoch": 1.2170293282876063, - "grad_norm": 2.4243550300598145, - "learning_rate": 1.3145780221487754e-05, - "loss": 1.1653, - "step": 1608 - }, - { - "epoch": 1.2177861873226112, - "grad_norm": 2.211627960205078, - "learning_rate": 1.3138158783962668e-05, - "loss": 1.177, - "step": 1609 - }, - { - "epoch": 1.218543046357616, - "grad_norm": 2.025865316390991, - "learning_rate": 1.3130535324272884e-05, - "loss": 1.1536, - "step": 1610 - }, - { - "epoch": 1.2192999053926206, - "grad_norm": 2.297100782394409, - "learning_rate": 1.3122909847332349e-05, - "loss": 1.2091, - "step": 1611 - }, - { - "epoch": 1.2200567644276255, - "grad_norm": 2.41648006439209, - "learning_rate": 1.3115282358056333e-05, - "loss": 1.183, - "step": 1612 - }, - { - "epoch": 1.2208136234626301, - "grad_norm": 2.1309831142425537, - "learning_rate": 1.3107652861361408e-05, - "loss": 1.1715, - "step": 1613 - }, - { - "epoch": 1.2215704824976348, - "grad_norm": 2.268522262573242, - "learning_rate": 1.3100021362165426e-05, - "loss": 1.1762, - "step": 1614 - }, - { - "epoch": 1.2223273415326394, - "grad_norm": 2.296552896499634, - "learning_rate": 1.3092387865387533e-05, - "loss": 1.104, - "step": 1615 - }, - { - "epoch": 1.2230842005676443, - "grad_norm": 2.1397440433502197, - "learning_rate": 1.3084752375948166e-05, - "loss": 1.1284, - "step": 1616 - }, - { - "epoch": 1.223841059602649, - "grad_norm": 2.097498893737793, - "learning_rate": 1.3077114898769048e-05, - "loss": 1.124, - "step": 1617 - }, - { - "epoch": 1.2245979186376537, - "grad_norm": 2.212064027786255, - "learning_rate": 1.3069475438773178e-05, - "loss": 1.1184, - "step": 1618 - }, - { - "epoch": 1.2253547776726585, - "grad_norm": 2.323784351348877, - "learning_rate": 1.3061834000884831e-05, - "loss": 1.1615, - "step": 1619 - }, - { - "epoch": 1.2261116367076632, - "grad_norm": 2.1432077884674072, - "learning_rate": 1.3054190590029572e-05, - "loss": 1.1564, - "step": 1620 - }, - { - "epoch": 1.2268684957426679, - "grad_norm": 2.3040294647216797, - "learning_rate": 1.3046545211134218e-05, - "loss": 1.1227, - "step": 1621 - }, - { - "epoch": 1.2276253547776728, - "grad_norm": 2.406848669052124, - "learning_rate": 1.3038897869126865e-05, - "loss": 1.1577, - "step": 1622 - }, - { - "epoch": 1.2283822138126774, - "grad_norm": 2.3050808906555176, - "learning_rate": 1.3031248568936877e-05, - "loss": 1.1924, - "step": 1623 - }, - { - "epoch": 1.229139072847682, - "grad_norm": 2.03425669670105, - "learning_rate": 1.3023597315494874e-05, - "loss": 1.1474, - "step": 1624 - }, - { - "epoch": 1.2298959318826868, - "grad_norm": 2.2921745777130127, - "learning_rate": 1.3015944113732734e-05, - "loss": 1.1815, - "step": 1625 - }, - { - "epoch": 1.2306527909176916, - "grad_norm": 2.248823881149292, - "learning_rate": 1.3008288968583603e-05, - "loss": 1.1482, - "step": 1626 - }, - { - "epoch": 1.2314096499526963, - "grad_norm": 2.3645107746124268, - "learning_rate": 1.3000631884981858e-05, - "loss": 1.1383, - "step": 1627 - }, - { - "epoch": 1.232166508987701, - "grad_norm": 2.0877134799957275, - "learning_rate": 1.2992972867863147e-05, - "loss": 1.2064, - "step": 1628 - }, - { - "epoch": 1.2329233680227059, - "grad_norm": 2.3611538410186768, - "learning_rate": 1.2985311922164359e-05, - "loss": 1.1582, - "step": 1629 - }, - { - "epoch": 1.2336802270577105, - "grad_norm": 2.087958335876465, - "learning_rate": 1.2977649052823616e-05, - "loss": 1.1553, - "step": 1630 - }, - { - "epoch": 1.2344370860927152, - "grad_norm": 2.2635905742645264, - "learning_rate": 1.2969984264780283e-05, - "loss": 1.1704, - "step": 1631 - }, - { - "epoch": 1.2351939451277199, - "grad_norm": 2.251215934753418, - "learning_rate": 1.2962317562974976e-05, - "loss": 1.1855, - "step": 1632 - }, - { - "epoch": 1.2359508041627247, - "grad_norm": 3.073653221130371, - "learning_rate": 1.2954648952349527e-05, - "loss": 1.1935, - "step": 1633 - }, - { - "epoch": 1.2367076631977294, - "grad_norm": 2.2269108295440674, - "learning_rate": 1.2946978437847004e-05, - "loss": 1.146, - "step": 1634 - }, - { - "epoch": 1.237464522232734, - "grad_norm": 2.4930577278137207, - "learning_rate": 1.2939306024411713e-05, - "loss": 1.1703, - "step": 1635 - }, - { - "epoch": 1.238221381267739, - "grad_norm": 2.6076838970184326, - "learning_rate": 1.2931631716989166e-05, - "loss": 1.1756, - "step": 1636 - }, - { - "epoch": 1.2389782403027436, - "grad_norm": 2.250298023223877, - "learning_rate": 1.292395552052611e-05, - "loss": 1.1607, - "step": 1637 - }, - { - "epoch": 1.2397350993377483, - "grad_norm": 2.2543821334838867, - "learning_rate": 1.29162774399705e-05, - "loss": 1.1653, - "step": 1638 - }, - { - "epoch": 1.2404919583727532, - "grad_norm": 2.1660284996032715, - "learning_rate": 1.290859748027151e-05, - "loss": 1.0827, - "step": 1639 - }, - { - "epoch": 1.2412488174077578, - "grad_norm": 2.392023801803589, - "learning_rate": 1.2900915646379524e-05, - "loss": 1.1651, - "step": 1640 - }, - { - "epoch": 1.2420056764427625, - "grad_norm": 2.147473096847534, - "learning_rate": 1.2893231943246143e-05, - "loss": 1.1012, - "step": 1641 - }, - { - "epoch": 1.2427625354777674, - "grad_norm": 2.2261102199554443, - "learning_rate": 1.2885546375824154e-05, - "loss": 1.1313, - "step": 1642 - }, - { - "epoch": 1.243519394512772, - "grad_norm": 2.1518611907958984, - "learning_rate": 1.2877858949067564e-05, - "loss": 1.1309, - "step": 1643 - }, - { - "epoch": 1.2442762535477767, - "grad_norm": 2.2095835208892822, - "learning_rate": 1.2870169667931567e-05, - "loss": 1.109, - "step": 1644 - }, - { - "epoch": 1.2450331125827814, - "grad_norm": 2.3556344509124756, - "learning_rate": 1.2862478537372556e-05, - "loss": 1.0937, - "step": 1645 - }, - { - "epoch": 1.2457899716177863, - "grad_norm": 2.178274154663086, - "learning_rate": 1.2854785562348121e-05, - "loss": 1.1857, - "step": 1646 - }, - { - "epoch": 1.246546830652791, - "grad_norm": 2.1965596675872803, - "learning_rate": 1.2847090747817033e-05, - "loss": 1.1292, - "step": 1647 - }, - { - "epoch": 1.2473036896877956, - "grad_norm": 2.359292984008789, - "learning_rate": 1.2839394098739257e-05, - "loss": 1.1753, - "step": 1648 - }, - { - "epoch": 1.2480605487228003, - "grad_norm": 2.189749002456665, - "learning_rate": 1.2831695620075926e-05, - "loss": 1.0852, - "step": 1649 - }, - { - "epoch": 1.2488174077578051, - "grad_norm": 2.1658499240875244, - "learning_rate": 1.2823995316789366e-05, - "loss": 1.1278, - "step": 1650 - }, - { - "epoch": 1.2495742667928098, - "grad_norm": 2.249729871749878, - "learning_rate": 1.281629319384308e-05, - "loss": 1.1829, - "step": 1651 - }, - { - "epoch": 1.2503311258278145, - "grad_norm": 2.2419471740722656, - "learning_rate": 1.2808589256201735e-05, - "loss": 1.1373, - "step": 1652 - }, - { - "epoch": 1.2510879848628194, - "grad_norm": 2.296644449234009, - "learning_rate": 1.280088350883117e-05, - "loss": 1.1724, - "step": 1653 - }, - { - "epoch": 1.251844843897824, - "grad_norm": 2.77717661857605, - "learning_rate": 1.2793175956698398e-05, - "loss": 1.1367, - "step": 1654 - }, - { - "epoch": 1.2526017029328287, - "grad_norm": 2.4535298347473145, - "learning_rate": 1.2785466604771584e-05, - "loss": 1.1874, - "step": 1655 - }, - { - "epoch": 1.2533585619678336, - "grad_norm": 2.1663715839385986, - "learning_rate": 1.2777755458020058e-05, - "loss": 1.1819, - "step": 1656 - }, - { - "epoch": 1.2541154210028382, - "grad_norm": 2.467954158782959, - "learning_rate": 1.2770042521414314e-05, - "loss": 1.1761, - "step": 1657 - }, - { - "epoch": 1.254872280037843, - "grad_norm": 2.3556721210479736, - "learning_rate": 1.2762327799925991e-05, - "loss": 1.1574, - "step": 1658 - }, - { - "epoch": 1.2556291390728478, - "grad_norm": 2.204136371612549, - "learning_rate": 1.2754611298527875e-05, - "loss": 1.0962, - "step": 1659 - }, - { - "epoch": 1.2563859981078525, - "grad_norm": 2.3233225345611572, - "learning_rate": 1.274689302219391e-05, - "loss": 1.1369, - "step": 1660 - }, - { - "epoch": 1.2571428571428571, - "grad_norm": 2.5201222896575928, - "learning_rate": 1.2739172975899181e-05, - "loss": 1.1593, - "step": 1661 - }, - { - "epoch": 1.257899716177862, - "grad_norm": 2.531087875366211, - "learning_rate": 1.273145116461991e-05, - "loss": 1.1411, - "step": 1662 - }, - { - "epoch": 1.2586565752128667, - "grad_norm": 2.510352373123169, - "learning_rate": 1.2723727593333454e-05, - "loss": 1.1426, - "step": 1663 - }, - { - "epoch": 1.2594134342478713, - "grad_norm": 2.217392921447754, - "learning_rate": 1.2716002267018314e-05, - "loss": 1.0712, - "step": 1664 - }, - { - "epoch": 1.260170293282876, - "grad_norm": 2.3988654613494873, - "learning_rate": 1.2708275190654126e-05, - "loss": 1.1884, - "step": 1665 - }, - { - "epoch": 1.2609271523178807, - "grad_norm": 2.151139259338379, - "learning_rate": 1.2700546369221628e-05, - "loss": 1.095, - "step": 1666 - }, - { - "epoch": 1.2616840113528855, - "grad_norm": 2.3287789821624756, - "learning_rate": 1.2692815807702711e-05, - "loss": 1.1726, - "step": 1667 - }, - { - "epoch": 1.2624408703878902, - "grad_norm": 2.1874783039093018, - "learning_rate": 1.268508351108038e-05, - "loss": 1.1389, - "step": 1668 - }, - { - "epoch": 1.2631977294228949, - "grad_norm": 2.501871347427368, - "learning_rate": 1.2677349484338747e-05, - "loss": 1.1717, - "step": 1669 - }, - { - "epoch": 1.2639545884578998, - "grad_norm": 2.2890784740448, - "learning_rate": 1.2669613732463053e-05, - "loss": 1.1069, - "step": 1670 - }, - { - "epoch": 1.2647114474929044, - "grad_norm": 2.41701340675354, - "learning_rate": 1.2661876260439642e-05, - "loss": 1.142, - "step": 1671 - }, - { - "epoch": 1.265468306527909, - "grad_norm": 2.5459794998168945, - "learning_rate": 1.2654137073255976e-05, - "loss": 1.1071, - "step": 1672 - }, - { - "epoch": 1.266225165562914, - "grad_norm": 2.1220319271087646, - "learning_rate": 1.2646396175900612e-05, - "loss": 1.1644, - "step": 1673 - }, - { - "epoch": 1.2669820245979186, - "grad_norm": 2.383187770843506, - "learning_rate": 1.2638653573363215e-05, - "loss": 1.1515, - "step": 1674 - }, - { - "epoch": 1.2677388836329233, - "grad_norm": 2.090808868408203, - "learning_rate": 1.2630909270634554e-05, - "loss": 1.1151, - "step": 1675 - }, - { - "epoch": 1.2684957426679282, - "grad_norm": 2.306619882583618, - "learning_rate": 1.2623163272706483e-05, - "loss": 1.177, - "step": 1676 - }, - { - "epoch": 1.2692526017029329, - "grad_norm": 2.4056408405303955, - "learning_rate": 1.261541558457195e-05, - "loss": 1.1811, - "step": 1677 - }, - { - "epoch": 1.2700094607379375, - "grad_norm": 2.4404773712158203, - "learning_rate": 1.2607666211225002e-05, - "loss": 1.1686, - "step": 1678 - }, - { - "epoch": 1.2707663197729424, - "grad_norm": 2.1623356342315674, - "learning_rate": 1.2599915157660776e-05, - "loss": 1.1472, - "step": 1679 - }, - { - "epoch": 1.271523178807947, - "grad_norm": 2.5244863033294678, - "learning_rate": 1.2592162428875465e-05, - "loss": 1.1093, - "step": 1680 - }, - { - "epoch": 1.2722800378429517, - "grad_norm": 2.649132251739502, - "learning_rate": 1.2584408029866373e-05, - "loss": 1.1644, - "step": 1681 - }, - { - "epoch": 1.2730368968779564, - "grad_norm": 2.245384931564331, - "learning_rate": 1.2576651965631862e-05, - "loss": 1.2137, - "step": 1682 - }, - { - "epoch": 1.2737937559129613, - "grad_norm": 3.0994908809661865, - "learning_rate": 1.256889424117137e-05, - "loss": 1.1189, - "step": 1683 - }, - { - "epoch": 1.274550614947966, - "grad_norm": 2.228210210800171, - "learning_rate": 1.2561134861485413e-05, - "loss": 1.1694, - "step": 1684 - }, - { - "epoch": 1.2753074739829706, - "grad_norm": 2.0974786281585693, - "learning_rate": 1.2553373831575572e-05, - "loss": 1.1661, - "step": 1685 - }, - { - "epoch": 1.2760643330179753, - "grad_norm": 2.1458041667938232, - "learning_rate": 1.2545611156444477e-05, - "loss": 1.0814, - "step": 1686 - }, - { - "epoch": 1.2768211920529802, - "grad_norm": 2.163115978240967, - "learning_rate": 1.253784684109584e-05, - "loss": 1.1643, - "step": 1687 - }, - { - "epoch": 1.2775780510879848, - "grad_norm": 2.122997999191284, - "learning_rate": 1.2530080890534416e-05, - "loss": 1.172, - "step": 1688 - }, - { - "epoch": 1.2783349101229895, - "grad_norm": 2.4748451709747314, - "learning_rate": 1.2522313309766021e-05, - "loss": 1.1489, - "step": 1689 - }, - { - "epoch": 1.2790917691579944, - "grad_norm": 2.201387882232666, - "learning_rate": 1.2514544103797517e-05, - "loss": 1.1509, - "step": 1690 - }, - { - "epoch": 1.279848628192999, - "grad_norm": 2.158069610595703, - "learning_rate": 1.2506773277636812e-05, - "loss": 1.1284, - "step": 1691 - }, - { - "epoch": 1.2806054872280037, - "grad_norm": 2.192920684814453, - "learning_rate": 1.2499000836292875e-05, - "loss": 1.156, - "step": 1692 - }, - { - "epoch": 1.2813623462630086, - "grad_norm": 2.266641855239868, - "learning_rate": 1.2491226784775685e-05, - "loss": 1.1298, - "step": 1693 - }, - { - "epoch": 1.2821192052980133, - "grad_norm": 2.677654981613159, - "learning_rate": 1.2483451128096289e-05, - "loss": 1.1472, - "step": 1694 - }, - { - "epoch": 1.282876064333018, - "grad_norm": 2.4137139320373535, - "learning_rate": 1.2475673871266756e-05, - "loss": 1.075, - "step": 1695 - }, - { - "epoch": 1.2836329233680228, - "grad_norm": 2.201813220977783, - "learning_rate": 1.2467895019300187e-05, - "loss": 1.1224, - "step": 1696 - }, - { - "epoch": 1.2843897824030275, - "grad_norm": 2.1659185886383057, - "learning_rate": 1.2460114577210703e-05, - "loss": 1.1606, - "step": 1697 - }, - { - "epoch": 1.2851466414380321, - "grad_norm": 2.3215322494506836, - "learning_rate": 1.245233255001347e-05, - "loss": 1.1408, - "step": 1698 - }, - { - "epoch": 1.285903500473037, - "grad_norm": 2.530764579772949, - "learning_rate": 1.2444548942724657e-05, - "loss": 1.1433, - "step": 1699 - }, - { - "epoch": 1.2866603595080417, - "grad_norm": 2.5110771656036377, - "learning_rate": 1.2436763760361461e-05, - "loss": 1.1644, - "step": 1700 - }, - { - "epoch": 1.2874172185430464, - "grad_norm": 2.0336203575134277, - "learning_rate": 1.2428977007942092e-05, - "loss": 1.1538, - "step": 1701 - }, - { - "epoch": 1.288174077578051, - "grad_norm": 2.3316948413848877, - "learning_rate": 1.2421188690485767e-05, - "loss": 1.1349, - "step": 1702 - }, - { - "epoch": 1.2889309366130557, - "grad_norm": 2.151745319366455, - "learning_rate": 1.241339881301273e-05, - "loss": 1.1464, - "step": 1703 - }, - { - "epoch": 1.2896877956480606, - "grad_norm": 2.1439285278320312, - "learning_rate": 1.2405607380544198e-05, - "loss": 1.1384, - "step": 1704 - }, - { - "epoch": 1.2904446546830652, - "grad_norm": 2.0676236152648926, - "learning_rate": 1.239781439810242e-05, - "loss": 1.1315, - "step": 1705 - }, - { - "epoch": 1.29120151371807, - "grad_norm": 2.1534860134124756, - "learning_rate": 1.239001987071064e-05, - "loss": 1.1232, - "step": 1706 - }, - { - "epoch": 1.2919583727530748, - "grad_norm": 2.4337046146392822, - "learning_rate": 1.238222380339308e-05, - "loss": 1.1637, - "step": 1707 - }, - { - "epoch": 1.2927152317880795, - "grad_norm": 3.3103768825531006, - "learning_rate": 1.2374426201174974e-05, - "loss": 1.1255, - "step": 1708 - }, - { - "epoch": 1.2934720908230841, - "grad_norm": 2.3964853286743164, - "learning_rate": 1.2366627069082533e-05, - "loss": 1.1474, - "step": 1709 - }, - { - "epoch": 1.294228949858089, - "grad_norm": 2.196171760559082, - "learning_rate": 1.235882641214296e-05, - "loss": 1.1152, - "step": 1710 - }, - { - "epoch": 1.2949858088930937, - "grad_norm": 2.2231311798095703, - "learning_rate": 1.2351024235384435e-05, - "loss": 1.0872, - "step": 1711 - }, - { - "epoch": 1.2957426679280983, - "grad_norm": 3.2890310287475586, - "learning_rate": 1.2343220543836132e-05, - "loss": 1.1376, - "step": 1712 - }, - { - "epoch": 1.2964995269631032, - "grad_norm": 2.37038516998291, - "learning_rate": 1.2335415342528186e-05, - "loss": 1.1309, - "step": 1713 - }, - { - "epoch": 1.2972563859981079, - "grad_norm": 2.0955164432525635, - "learning_rate": 1.2327608636491706e-05, - "loss": 1.1721, - "step": 1714 - }, - { - "epoch": 1.2980132450331126, - "grad_norm": 2.3492562770843506, - "learning_rate": 1.2319800430758787e-05, - "loss": 1.1855, - "step": 1715 - }, - { - "epoch": 1.2987701040681174, - "grad_norm": 2.366028308868408, - "learning_rate": 1.231199073036247e-05, - "loss": 1.1547, - "step": 1716 - }, - { - "epoch": 1.299526963103122, - "grad_norm": 2.163280725479126, - "learning_rate": 1.230417954033677e-05, - "loss": 1.1289, - "step": 1717 - }, - { - "epoch": 1.3002838221381268, - "grad_norm": 2.1231632232666016, - "learning_rate": 1.2296366865716663e-05, - "loss": 1.1386, - "step": 1718 - }, - { - "epoch": 1.3010406811731314, - "grad_norm": 2.1293210983276367, - "learning_rate": 1.2288552711538076e-05, - "loss": 1.185, - "step": 1719 - }, - { - "epoch": 1.3017975402081363, - "grad_norm": 2.0795953273773193, - "learning_rate": 1.2280737082837903e-05, - "loss": 1.0935, - "step": 1720 - }, - { - "epoch": 1.302554399243141, - "grad_norm": 2.1011762619018555, - "learning_rate": 1.2272919984653972e-05, - "loss": 1.1672, - "step": 1721 - }, - { - "epoch": 1.3033112582781456, - "grad_norm": 2.221156120300293, - "learning_rate": 1.2265101422025064e-05, - "loss": 1.1073, - "step": 1722 - }, - { - "epoch": 1.3040681173131503, - "grad_norm": 2.2249984741210938, - "learning_rate": 1.2257281399990913e-05, - "loss": 1.1659, - "step": 1723 - }, - { - "epoch": 1.3048249763481552, - "grad_norm": 4.573660850524902, - "learning_rate": 1.2249459923592176e-05, - "loss": 1.1835, - "step": 1724 - }, - { - "epoch": 1.3055818353831599, - "grad_norm": 2.1640846729278564, - "learning_rate": 1.2241636997870459e-05, - "loss": 1.1342, - "step": 1725 - }, - { - "epoch": 1.3063386944181645, - "grad_norm": 2.010333299636841, - "learning_rate": 1.223381262786831e-05, - "loss": 1.1, - "step": 1726 - }, - { - "epoch": 1.3070955534531694, - "grad_norm": 2.026108980178833, - "learning_rate": 1.2225986818629188e-05, - "loss": 1.1424, - "step": 1727 - }, - { - "epoch": 1.307852412488174, - "grad_norm": 2.0564801692962646, - "learning_rate": 1.22181595751975e-05, - "loss": 1.1336, - "step": 1728 - }, - { - "epoch": 1.3086092715231787, - "grad_norm": 1.9752734899520874, - "learning_rate": 1.2210330902618555e-05, - "loss": 1.1617, - "step": 1729 - }, - { - "epoch": 1.3093661305581836, - "grad_norm": 2.1132519245147705, - "learning_rate": 1.2202500805938606e-05, - "loss": 1.1841, - "step": 1730 - }, - { - "epoch": 1.3101229895931883, - "grad_norm": 2.167475461959839, - "learning_rate": 1.2194669290204813e-05, - "loss": 1.1255, - "step": 1731 - }, - { - "epoch": 1.310879848628193, - "grad_norm": 2.1125247478485107, - "learning_rate": 1.2186836360465249e-05, - "loss": 1.1434, - "step": 1732 - }, - { - "epoch": 1.3116367076631978, - "grad_norm": 2.126776933670044, - "learning_rate": 1.21790020217689e-05, - "loss": 1.1626, - "step": 1733 - }, - { - "epoch": 1.3123935666982025, - "grad_norm": 2.1454262733459473, - "learning_rate": 1.2171166279165668e-05, - "loss": 1.116, - "step": 1734 - }, - { - "epoch": 1.3131504257332072, - "grad_norm": 2.166027545928955, - "learning_rate": 1.216332913770634e-05, - "loss": 1.145, - "step": 1735 - }, - { - "epoch": 1.313907284768212, - "grad_norm": 2.0442612171173096, - "learning_rate": 1.2155490602442628e-05, - "loss": 1.1443, - "step": 1736 - }, - { - "epoch": 1.3146641438032167, - "grad_norm": 2.2408742904663086, - "learning_rate": 1.2147650678427136e-05, - "loss": 1.1297, - "step": 1737 - }, - { - "epoch": 1.3154210028382214, - "grad_norm": 2.039287805557251, - "learning_rate": 1.213980937071335e-05, - "loss": 1.1183, - "step": 1738 - }, - { - "epoch": 1.316177861873226, - "grad_norm": 2.4958298206329346, - "learning_rate": 1.213196668435566e-05, - "loss": 1.1127, - "step": 1739 - }, - { - "epoch": 1.3169347209082307, - "grad_norm": 2.0684995651245117, - "learning_rate": 1.212412262440935e-05, - "loss": 1.1092, - "step": 1740 - }, - { - "epoch": 1.3176915799432356, - "grad_norm": 2.2518489360809326, - "learning_rate": 1.2116277195930566e-05, - "loss": 1.1256, - "step": 1741 - }, - { - "epoch": 1.3184484389782403, - "grad_norm": 2.2096285820007324, - "learning_rate": 1.2108430403976363e-05, - "loss": 1.1785, - "step": 1742 - }, - { - "epoch": 1.319205298013245, - "grad_norm": 2.1161551475524902, - "learning_rate": 1.2100582253604663e-05, - "loss": 1.1816, - "step": 1743 - }, - { - "epoch": 1.3199621570482498, - "grad_norm": 2.2261106967926025, - "learning_rate": 1.2092732749874258e-05, - "loss": 1.1512, - "step": 1744 - }, - { - "epoch": 1.3207190160832545, - "grad_norm": 2.4035398960113525, - "learning_rate": 1.2084881897844827e-05, - "loss": 1.1505, - "step": 1745 - }, - { - "epoch": 1.3214758751182591, - "grad_norm": 2.2090861797332764, - "learning_rate": 1.2077029702576898e-05, - "loss": 1.1521, - "step": 1746 - }, - { - "epoch": 1.322232734153264, - "grad_norm": 2.054429769515991, - "learning_rate": 1.2069176169131889e-05, - "loss": 1.1863, - "step": 1747 - }, - { - "epoch": 1.3229895931882687, - "grad_norm": 2.0456814765930176, - "learning_rate": 1.2061321302572063e-05, - "loss": 1.1125, - "step": 1748 - }, - { - "epoch": 1.3237464522232734, - "grad_norm": 2.066861629486084, - "learning_rate": 1.2053465107960536e-05, - "loss": 1.1107, - "step": 1749 - }, - { - "epoch": 1.3245033112582782, - "grad_norm": 2.2116339206695557, - "learning_rate": 1.204560759036131e-05, - "loss": 1.1796, - "step": 1750 - }, - { - "epoch": 1.325260170293283, - "grad_norm": 2.355694532394409, - "learning_rate": 1.203774875483921e-05, - "loss": 1.1221, - "step": 1751 - }, - { - "epoch": 1.3260170293282876, - "grad_norm": 2.3318047523498535, - "learning_rate": 1.202988860645992e-05, - "loss": 1.1482, - "step": 1752 - }, - { - "epoch": 1.3267738883632925, - "grad_norm": 2.3973910808563232, - "learning_rate": 1.202202715028998e-05, - "loss": 1.09, - "step": 1753 - }, - { - "epoch": 1.3275307473982971, - "grad_norm": 2.3162357807159424, - "learning_rate": 1.2014164391396761e-05, - "loss": 1.1362, - "step": 1754 - }, - { - "epoch": 1.3282876064333018, - "grad_norm": 2.232311964035034, - "learning_rate": 1.2006300334848472e-05, - "loss": 1.1419, - "step": 1755 - }, - { - "epoch": 1.3290444654683065, - "grad_norm": 2.3239498138427734, - "learning_rate": 1.1998434985714172e-05, - "loss": 1.151, - "step": 1756 - }, - { - "epoch": 1.3298013245033111, - "grad_norm": 2.5459787845611572, - "learning_rate": 1.1990568349063742e-05, - "loss": 1.1649, - "step": 1757 - }, - { - "epoch": 1.330558183538316, - "grad_norm": 2.184105396270752, - "learning_rate": 1.1982700429967893e-05, - "loss": 1.1334, - "step": 1758 - }, - { - "epoch": 1.3313150425733207, - "grad_norm": 2.092010498046875, - "learning_rate": 1.1974831233498175e-05, - "loss": 1.0945, - "step": 1759 - }, - { - "epoch": 1.3320719016083253, - "grad_norm": 2.204160690307617, - "learning_rate": 1.1966960764726937e-05, - "loss": 1.1411, - "step": 1760 - }, - { - "epoch": 1.3328287606433302, - "grad_norm": 2.467329978942871, - "learning_rate": 1.195908902872738e-05, - "loss": 1.1259, - "step": 1761 - }, - { - "epoch": 1.3335856196783349, - "grad_norm": 2.2322754859924316, - "learning_rate": 1.1951216030573489e-05, - "loss": 1.1204, - "step": 1762 - }, - { - "epoch": 1.3343424787133396, - "grad_norm": 2.1422557830810547, - "learning_rate": 1.1943341775340087e-05, - "loss": 1.1306, - "step": 1763 - }, - { - "epoch": 1.3350993377483444, - "grad_norm": 2.393411159515381, - "learning_rate": 1.1935466268102802e-05, - "loss": 1.1409, - "step": 1764 - }, - { - "epoch": 1.335856196783349, - "grad_norm": 2.2010276317596436, - "learning_rate": 1.192758951393806e-05, - "loss": 1.0952, - "step": 1765 - }, - { - "epoch": 1.3366130558183538, - "grad_norm": 2.128002166748047, - "learning_rate": 1.1919711517923095e-05, - "loss": 1.1084, - "step": 1766 - }, - { - "epoch": 1.3373699148533587, - "grad_norm": 2.090876340866089, - "learning_rate": 1.1911832285135953e-05, - "loss": 1.1409, - "step": 1767 - }, - { - "epoch": 1.3381267738883633, - "grad_norm": 2.232081890106201, - "learning_rate": 1.1903951820655458e-05, - "loss": 1.176, - "step": 1768 - }, - { - "epoch": 1.338883632923368, - "grad_norm": 2.2187860012054443, - "learning_rate": 1.1896070129561237e-05, - "loss": 1.1094, - "step": 1769 - }, - { - "epoch": 1.3396404919583729, - "grad_norm": 2.2788565158843994, - "learning_rate": 1.1888187216933715e-05, - "loss": 1.1302, - "step": 1770 - }, - { - "epoch": 1.3403973509933775, - "grad_norm": 2.153656482696533, - "learning_rate": 1.1880303087854093e-05, - "loss": 1.1742, - "step": 1771 - }, - { - "epoch": 1.3411542100283822, - "grad_norm": 2.2120232582092285, - "learning_rate": 1.187241774740436e-05, - "loss": 1.1553, - "step": 1772 - }, - { - "epoch": 1.3419110690633869, - "grad_norm": 2.09063720703125, - "learning_rate": 1.1864531200667283e-05, - "loss": 1.1231, - "step": 1773 - }, - { - "epoch": 1.3426679280983917, - "grad_norm": 2.186126232147217, - "learning_rate": 1.1856643452726417e-05, - "loss": 1.1121, - "step": 1774 - }, - { - "epoch": 1.3434247871333964, - "grad_norm": 2.706040620803833, - "learning_rate": 1.1848754508666084e-05, - "loss": 1.1323, - "step": 1775 - }, - { - "epoch": 1.344181646168401, - "grad_norm": 2.1138103008270264, - "learning_rate": 1.1840864373571368e-05, - "loss": 1.1255, - "step": 1776 - }, - { - "epoch": 1.3449385052034057, - "grad_norm": 2.1181037425994873, - "learning_rate": 1.1832973052528136e-05, - "loss": 1.1406, - "step": 1777 - }, - { - "epoch": 1.3456953642384106, - "grad_norm": 2.0773799419403076, - "learning_rate": 1.1825080550623014e-05, - "loss": 1.1159, - "step": 1778 - }, - { - "epoch": 1.3464522232734153, - "grad_norm": 2.3848013877868652, - "learning_rate": 1.1817186872943385e-05, - "loss": 1.1687, - "step": 1779 - }, - { - "epoch": 1.34720908230842, - "grad_norm": 2.052957534790039, - "learning_rate": 1.1809292024577397e-05, - "loss": 1.176, - "step": 1780 - }, - { - "epoch": 1.3479659413434248, - "grad_norm": 2.1066739559173584, - "learning_rate": 1.1801396010613947e-05, - "loss": 1.1563, - "step": 1781 - }, - { - "epoch": 1.3487228003784295, - "grad_norm": 2.2263689041137695, - "learning_rate": 1.1793498836142685e-05, - "loss": 1.1763, - "step": 1782 - }, - { - "epoch": 1.3494796594134342, - "grad_norm": 2.263129711151123, - "learning_rate": 1.178560050625401e-05, - "loss": 1.1737, - "step": 1783 - }, - { - "epoch": 1.350236518448439, - "grad_norm": 2.3742623329162598, - "learning_rate": 1.1777701026039063e-05, - "loss": 1.123, - "step": 1784 - }, - { - "epoch": 1.3509933774834437, - "grad_norm": 2.1886773109436035, - "learning_rate": 1.1769800400589733e-05, - "loss": 1.1635, - "step": 1785 - }, - { - "epoch": 1.3517502365184484, - "grad_norm": 2.190129041671753, - "learning_rate": 1.1761898634998635e-05, - "loss": 1.1131, - "step": 1786 - }, - { - "epoch": 1.3525070955534533, - "grad_norm": 2.258070468902588, - "learning_rate": 1.1753995734359131e-05, - "loss": 1.1794, - "step": 1787 - }, - { - "epoch": 1.353263954588458, - "grad_norm": 2.0639896392822266, - "learning_rate": 1.1746091703765316e-05, - "loss": 1.1047, - "step": 1788 - }, - { - "epoch": 1.3540208136234626, - "grad_norm": 2.4623801708221436, - "learning_rate": 1.1738186548311998e-05, - "loss": 1.1642, - "step": 1789 - }, - { - "epoch": 1.3547776726584675, - "grad_norm": 2.082294225692749, - "learning_rate": 1.1730280273094724e-05, - "loss": 1.1374, - "step": 1790 - }, - { - "epoch": 1.3555345316934722, - "grad_norm": 1.9872076511383057, - "learning_rate": 1.1722372883209758e-05, - "loss": 1.1305, - "step": 1791 - }, - { - "epoch": 1.3562913907284768, - "grad_norm": 2.2372727394104004, - "learning_rate": 1.1714464383754085e-05, - "loss": 1.1261, - "step": 1792 - }, - { - "epoch": 1.3570482497634815, - "grad_norm": 2.16003680229187, - "learning_rate": 1.1706554779825399e-05, - "loss": 1.1289, - "step": 1793 - }, - { - "epoch": 1.3578051087984861, - "grad_norm": 2.2329182624816895, - "learning_rate": 1.1698644076522112e-05, - "loss": 1.1331, - "step": 1794 - }, - { - "epoch": 1.358561967833491, - "grad_norm": 2.2425284385681152, - "learning_rate": 1.1690732278943344e-05, - "loss": 1.2247, - "step": 1795 - }, - { - "epoch": 1.3593188268684957, - "grad_norm": 2.589672803878784, - "learning_rate": 1.1682819392188917e-05, - "loss": 1.144, - "step": 1796 - }, - { - "epoch": 1.3600756859035004, - "grad_norm": 2.2635231018066406, - "learning_rate": 1.1674905421359358e-05, - "loss": 1.1585, - "step": 1797 - }, - { - "epoch": 1.3608325449385053, - "grad_norm": 2.291184425354004, - "learning_rate": 1.1666990371555893e-05, - "loss": 1.1063, - "step": 1798 - }, - { - "epoch": 1.36158940397351, - "grad_norm": 2.289581298828125, - "learning_rate": 1.1659074247880442e-05, - "loss": 1.183, - "step": 1799 - }, - { - "epoch": 1.3623462630085146, - "grad_norm": 2.1125857830047607, - "learning_rate": 1.1651157055435616e-05, - "loss": 1.1226, - "step": 1800 - }, - { - "epoch": 1.3631031220435195, - "grad_norm": 2.1084022521972656, - "learning_rate": 1.1643238799324714e-05, - "loss": 1.1741, - "step": 1801 - }, - { - "epoch": 1.3638599810785241, - "grad_norm": 2.2463252544403076, - "learning_rate": 1.1635319484651733e-05, - "loss": 1.1459, - "step": 1802 - }, - { - "epoch": 1.3646168401135288, - "grad_norm": 2.2021613121032715, - "learning_rate": 1.1627399116521334e-05, - "loss": 1.1939, - "step": 1803 - }, - { - "epoch": 1.3653736991485337, - "grad_norm": 2.190654754638672, - "learning_rate": 1.1619477700038863e-05, - "loss": 1.0967, - "step": 1804 - }, - { - "epoch": 1.3661305581835383, - "grad_norm": 1.9912575483322144, - "learning_rate": 1.1611555240310356e-05, - "loss": 1.1268, - "step": 1805 - }, - { - "epoch": 1.366887417218543, - "grad_norm": 2.1702189445495605, - "learning_rate": 1.16036317424425e-05, - "loss": 1.1957, - "step": 1806 - }, - { - "epoch": 1.367644276253548, - "grad_norm": 2.0921695232391357, - "learning_rate": 1.1595707211542662e-05, - "loss": 1.1161, - "step": 1807 - }, - { - "epoch": 1.3684011352885526, - "grad_norm": 2.1319305896759033, - "learning_rate": 1.1587781652718877e-05, - "loss": 1.1411, - "step": 1808 - }, - { - "epoch": 1.3691579943235572, - "grad_norm": 2.2225658893585205, - "learning_rate": 1.1579855071079838e-05, - "loss": 1.1259, - "step": 1809 - }, - { - "epoch": 1.369914853358562, - "grad_norm": 1.943051815032959, - "learning_rate": 1.1571927471734894e-05, - "loss": 1.1088, - "step": 1810 - }, - { - "epoch": 1.3706717123935668, - "grad_norm": 2.3888943195343018, - "learning_rate": 1.156399885979406e-05, - "loss": 1.1416, - "step": 1811 - }, - { - "epoch": 1.3714285714285714, - "grad_norm": 2.145301580429077, - "learning_rate": 1.1556069240368002e-05, - "loss": 1.1412, - "step": 1812 - }, - { - "epoch": 1.372185430463576, - "grad_norm": 2.0973587036132812, - "learning_rate": 1.1548138618568023e-05, - "loss": 1.1358, - "step": 1813 - }, - { - "epoch": 1.3729422894985808, - "grad_norm": 2.2998507022857666, - "learning_rate": 1.1540206999506086e-05, - "loss": 1.152, - "step": 1814 - }, - { - "epoch": 1.3736991485335857, - "grad_norm": 2.1464428901672363, - "learning_rate": 1.1532274388294789e-05, - "loss": 1.105, - "step": 1815 - }, - { - "epoch": 1.3744560075685903, - "grad_norm": 2.0508806705474854, - "learning_rate": 1.152434079004738e-05, - "loss": 1.1425, - "step": 1816 - }, - { - "epoch": 1.375212866603595, - "grad_norm": 2.05656099319458, - "learning_rate": 1.151640620987772e-05, - "loss": 1.1042, - "step": 1817 - }, - { - "epoch": 1.3759697256385999, - "grad_norm": 2.153604030609131, - "learning_rate": 1.1508470652900332e-05, - "loss": 1.1361, - "step": 1818 - }, - { - "epoch": 1.3767265846736045, - "grad_norm": 2.9740560054779053, - "learning_rate": 1.1500534124230354e-05, - "loss": 1.1646, - "step": 1819 - }, - { - "epoch": 1.3774834437086092, - "grad_norm": 2.213672637939453, - "learning_rate": 1.149259662898354e-05, - "loss": 1.1348, - "step": 1820 - }, - { - "epoch": 1.378240302743614, - "grad_norm": 2.2703373432159424, - "learning_rate": 1.148465817227629e-05, - "loss": 1.1456, - "step": 1821 - }, - { - "epoch": 1.3789971617786188, - "grad_norm": 2.1815407276153564, - "learning_rate": 1.1476718759225611e-05, - "loss": 1.16, - "step": 1822 - }, - { - "epoch": 1.3797540208136234, - "grad_norm": 2.198272943496704, - "learning_rate": 1.1468778394949123e-05, - "loss": 1.1677, - "step": 1823 - }, - { - "epoch": 1.3805108798486283, - "grad_norm": 2.1629281044006348, - "learning_rate": 1.1460837084565064e-05, - "loss": 1.1269, - "step": 1824 - }, - { - "epoch": 1.381267738883633, - "grad_norm": 1.9979993104934692, - "learning_rate": 1.1452894833192287e-05, - "loss": 1.1243, - "step": 1825 - }, - { - "epoch": 1.3820245979186376, - "grad_norm": 2.1406540870666504, - "learning_rate": 1.144495164595024e-05, - "loss": 1.1819, - "step": 1826 - }, - { - "epoch": 1.3827814569536425, - "grad_norm": 2.2074644565582275, - "learning_rate": 1.1437007527958985e-05, - "loss": 1.1368, - "step": 1827 - }, - { - "epoch": 1.3835383159886472, - "grad_norm": 2.279019355773926, - "learning_rate": 1.1429062484339175e-05, - "loss": 1.1293, - "step": 1828 - }, - { - "epoch": 1.3842951750236518, - "grad_norm": 2.179516315460205, - "learning_rate": 1.1421116520212066e-05, - "loss": 1.1538, - "step": 1829 - }, - { - "epoch": 1.3850520340586565, - "grad_norm": 2.0977933406829834, - "learning_rate": 1.1413169640699505e-05, - "loss": 1.1259, - "step": 1830 - }, - { - "epoch": 1.3858088930936612, - "grad_norm": 2.1527068614959717, - "learning_rate": 1.1405221850923932e-05, - "loss": 1.0934, - "step": 1831 - }, - { - "epoch": 1.386565752128666, - "grad_norm": 2.2525691986083984, - "learning_rate": 1.1397273156008364e-05, - "loss": 1.2084, - "step": 1832 - }, - { - "epoch": 1.3873226111636707, - "grad_norm": 2.0335781574249268, - "learning_rate": 1.1389323561076419e-05, - "loss": 1.1224, - "step": 1833 - }, - { - "epoch": 1.3880794701986754, - "grad_norm": 2.142940044403076, - "learning_rate": 1.1381373071252273e-05, - "loss": 1.0934, - "step": 1834 - }, - { - "epoch": 1.3888363292336803, - "grad_norm": 2.2513163089752197, - "learning_rate": 1.1373421691660697e-05, - "loss": 1.161, - "step": 1835 - }, - { - "epoch": 1.389593188268685, - "grad_norm": 2.1784231662750244, - "learning_rate": 1.1365469427427037e-05, - "loss": 1.127, - "step": 1836 - }, - { - "epoch": 1.3903500473036896, - "grad_norm": 2.2235348224639893, - "learning_rate": 1.1357516283677185e-05, - "loss": 1.1595, - "step": 1837 - }, - { - "epoch": 1.3911069063386945, - "grad_norm": 2.08614182472229, - "learning_rate": 1.1349562265537626e-05, - "loss": 1.1083, - "step": 1838 - }, - { - "epoch": 1.3918637653736992, - "grad_norm": 1.9127520322799683, - "learning_rate": 1.1341607378135395e-05, - "loss": 1.1516, - "step": 1839 - }, - { - "epoch": 1.3926206244087038, - "grad_norm": 2.100748300552368, - "learning_rate": 1.1333651626598095e-05, - "loss": 1.1306, - "step": 1840 - }, - { - "epoch": 1.3933774834437087, - "grad_norm": 2.3174188137054443, - "learning_rate": 1.1325695016053878e-05, - "loss": 1.1893, - "step": 1841 - }, - { - "epoch": 1.3941343424787134, - "grad_norm": 2.4146411418914795, - "learning_rate": 1.1317737551631455e-05, - "loss": 1.1463, - "step": 1842 - }, - { - "epoch": 1.394891201513718, - "grad_norm": 2.604128837585449, - "learning_rate": 1.130977923846009e-05, - "loss": 1.131, - "step": 1843 - }, - { - "epoch": 1.395648060548723, - "grad_norm": 2.1692941188812256, - "learning_rate": 1.1301820081669586e-05, - "loss": 1.1504, - "step": 1844 - }, - { - "epoch": 1.3964049195837276, - "grad_norm": 1.9960031509399414, - "learning_rate": 1.1293860086390294e-05, - "loss": 1.133, - "step": 1845 - }, - { - "epoch": 1.3971617786187323, - "grad_norm": 2.2130203247070312, - "learning_rate": 1.1285899257753105e-05, - "loss": 1.1375, - "step": 1846 - }, - { - "epoch": 1.397918637653737, - "grad_norm": 2.1830358505249023, - "learning_rate": 1.1277937600889458e-05, - "loss": 1.1391, - "step": 1847 - }, - { - "epoch": 1.3986754966887418, - "grad_norm": 2.188948392868042, - "learning_rate": 1.1269975120931301e-05, - "loss": 1.1241, - "step": 1848 - }, - { - "epoch": 1.3994323557237465, - "grad_norm": 2.3037242889404297, - "learning_rate": 1.1262011823011132e-05, - "loss": 1.1221, - "step": 1849 - }, - { - "epoch": 1.4001892147587511, - "grad_norm": 2.0598981380462646, - "learning_rate": 1.1254047712261975e-05, - "loss": 1.108, - "step": 1850 - }, - { - "epoch": 1.4009460737937558, - "grad_norm": 2.116628885269165, - "learning_rate": 1.1246082793817372e-05, - "loss": 1.1166, - "step": 1851 - }, - { - "epoch": 1.4017029328287607, - "grad_norm": 2.09624981880188, - "learning_rate": 1.1238117072811389e-05, - "loss": 1.1216, - "step": 1852 - }, - { - "epoch": 1.4024597918637653, - "grad_norm": 2.092494487762451, - "learning_rate": 1.1230150554378606e-05, - "loss": 1.1447, - "step": 1853 - }, - { - "epoch": 1.40321665089877, - "grad_norm": 1.9815505743026733, - "learning_rate": 1.1222183243654119e-05, - "loss": 1.1939, - "step": 1854 - }, - { - "epoch": 1.403973509933775, - "grad_norm": 2.285538673400879, - "learning_rate": 1.121421514577354e-05, - "loss": 1.1981, - "step": 1855 - }, - { - "epoch": 1.4047303689687796, - "grad_norm": 2.4579432010650635, - "learning_rate": 1.1206246265872975e-05, - "loss": 1.1246, - "step": 1856 - }, - { - "epoch": 1.4054872280037842, - "grad_norm": 2.2195796966552734, - "learning_rate": 1.1198276609089051e-05, - "loss": 1.0943, - "step": 1857 - }, - { - "epoch": 1.4062440870387891, - "grad_norm": 2.3332061767578125, - "learning_rate": 1.1190306180558886e-05, - "loss": 1.1896, - "step": 1858 - }, - { - "epoch": 1.4070009460737938, - "grad_norm": 2.257955551147461, - "learning_rate": 1.1182334985420088e-05, - "loss": 1.1565, - "step": 1859 - }, - { - "epoch": 1.4077578051087984, - "grad_norm": 2.1527364253997803, - "learning_rate": 1.1174363028810782e-05, - "loss": 1.1269, - "step": 1860 - }, - { - "epoch": 1.4085146641438033, - "grad_norm": 2.168989896774292, - "learning_rate": 1.1166390315869555e-05, - "loss": 1.118, - "step": 1861 - }, - { - "epoch": 1.409271523178808, - "grad_norm": 2.1610758304595947, - "learning_rate": 1.1158416851735505e-05, - "loss": 1.1126, - "step": 1862 - }, - { - "epoch": 1.4100283822138127, - "grad_norm": 2.423572063446045, - "learning_rate": 1.1150442641548205e-05, - "loss": 1.1681, - "step": 1863 - }, - { - "epoch": 1.4107852412488175, - "grad_norm": 2.1142797470092773, - "learning_rate": 1.1142467690447708e-05, - "loss": 1.1159, - "step": 1864 - }, - { - "epoch": 1.4115421002838222, - "grad_norm": 2.206160068511963, - "learning_rate": 1.1134492003574541e-05, - "loss": 1.1007, - "step": 1865 - }, - { - "epoch": 1.4122989593188269, - "grad_norm": 2.223226547241211, - "learning_rate": 1.1126515586069716e-05, - "loss": 1.0648, - "step": 1866 - }, - { - "epoch": 1.4130558183538315, - "grad_norm": 2.488703966140747, - "learning_rate": 1.1118538443074713e-05, - "loss": 1.107, - "step": 1867 - }, - { - "epoch": 1.4138126773888362, - "grad_norm": 2.2958545684814453, - "learning_rate": 1.1110560579731469e-05, - "loss": 1.1553, - "step": 1868 - }, - { - "epoch": 1.414569536423841, - "grad_norm": 2.343440055847168, - "learning_rate": 1.1102582001182399e-05, - "loss": 1.1225, - "step": 1869 - }, - { - "epoch": 1.4153263954588458, - "grad_norm": 2.963460922241211, - "learning_rate": 1.1094602712570366e-05, - "loss": 1.1211, - "step": 1870 - }, - { - "epoch": 1.4160832544938504, - "grad_norm": 2.123777151107788, - "learning_rate": 1.1086622719038708e-05, - "loss": 1.0919, - "step": 1871 - }, - { - "epoch": 1.4168401135288553, - "grad_norm": 2.1496341228485107, - "learning_rate": 1.1078642025731197e-05, - "loss": 1.0807, - "step": 1872 - }, - { - "epoch": 1.41759697256386, - "grad_norm": 2.147340774536133, - "learning_rate": 1.107066063779207e-05, - "loss": 1.1372, - "step": 1873 - }, - { - "epoch": 1.4183538315988646, - "grad_norm": 2.05765438079834, - "learning_rate": 1.1062678560366013e-05, - "loss": 1.1531, - "step": 1874 - }, - { - "epoch": 1.4191106906338695, - "grad_norm": 2.409080982208252, - "learning_rate": 1.1054695798598142e-05, - "loss": 1.1531, - "step": 1875 - }, - { - "epoch": 1.4198675496688742, - "grad_norm": 1.9932847023010254, - "learning_rate": 1.104671235763403e-05, - "loss": 1.1766, - "step": 1876 - }, - { - "epoch": 1.4206244087038788, - "grad_norm": 2.2019896507263184, - "learning_rate": 1.1038728242619686e-05, - "loss": 1.1037, - "step": 1877 - }, - { - "epoch": 1.4213812677388837, - "grad_norm": 2.263040065765381, - "learning_rate": 1.1030743458701533e-05, - "loss": 1.1475, - "step": 1878 - }, - { - "epoch": 1.4221381267738884, - "grad_norm": 2.0611464977264404, - "learning_rate": 1.1022758011026455e-05, - "loss": 1.1652, - "step": 1879 - }, - { - "epoch": 1.422894985808893, - "grad_norm": 2.175058364868164, - "learning_rate": 1.1014771904741746e-05, - "loss": 1.1164, - "step": 1880 - }, - { - "epoch": 1.423651844843898, - "grad_norm": 2.152921676635742, - "learning_rate": 1.1006785144995127e-05, - "loss": 1.1327, - "step": 1881 - }, - { - "epoch": 1.4244087038789026, - "grad_norm": 2.4787025451660156, - "learning_rate": 1.0998797736934743e-05, - "loss": 1.1633, - "step": 1882 - }, - { - "epoch": 1.4251655629139073, - "grad_norm": 2.7934088706970215, - "learning_rate": 1.0990809685709149e-05, - "loss": 1.1831, - "step": 1883 - }, - { - "epoch": 1.425922421948912, - "grad_norm": 2.058727502822876, - "learning_rate": 1.0982820996467334e-05, - "loss": 1.1262, - "step": 1884 - }, - { - "epoch": 1.4266792809839166, - "grad_norm": 2.1673519611358643, - "learning_rate": 1.0974831674358674e-05, - "loss": 1.1478, - "step": 1885 - }, - { - "epoch": 1.4274361400189215, - "grad_norm": 2.481576442718506, - "learning_rate": 1.0966841724532966e-05, - "loss": 1.1166, - "step": 1886 - }, - { - "epoch": 1.4281929990539262, - "grad_norm": 2.131117820739746, - "learning_rate": 1.0958851152140413e-05, - "loss": 1.084, - "step": 1887 - }, - { - "epoch": 1.4289498580889308, - "grad_norm": 2.3017077445983887, - "learning_rate": 1.095085996233162e-05, - "loss": 1.1406, - "step": 1888 - }, - { - "epoch": 1.4297067171239357, - "grad_norm": 2.4619855880737305, - "learning_rate": 1.0942868160257574e-05, - "loss": 1.1287, - "step": 1889 - }, - { - "epoch": 1.4304635761589404, - "grad_norm": 2.3940885066986084, - "learning_rate": 1.0934875751069679e-05, - "loss": 1.1135, - "step": 1890 - }, - { - "epoch": 1.431220435193945, - "grad_norm": 2.3564260005950928, - "learning_rate": 1.0926882739919718e-05, - "loss": 1.1474, - "step": 1891 - }, - { - "epoch": 1.43197729422895, - "grad_norm": 2.1630711555480957, - "learning_rate": 1.091888913195986e-05, - "loss": 1.1622, - "step": 1892 - }, - { - "epoch": 1.4327341532639546, - "grad_norm": 2.173370838165283, - "learning_rate": 1.0910894932342666e-05, - "loss": 1.1384, - "step": 1893 - }, - { - "epoch": 1.4334910122989593, - "grad_norm": 2.09320068359375, - "learning_rate": 1.0902900146221075e-05, - "loss": 1.1625, - "step": 1894 - }, - { - "epoch": 1.4342478713339641, - "grad_norm": 2.508751630783081, - "learning_rate": 1.0894904778748406e-05, - "loss": 1.1457, - "step": 1895 - }, - { - "epoch": 1.4350047303689688, - "grad_norm": 2.234450578689575, - "learning_rate": 1.0886908835078349e-05, - "loss": 1.1785, - "step": 1896 - }, - { - "epoch": 1.4357615894039735, - "grad_norm": 2.431640148162842, - "learning_rate": 1.0878912320364962e-05, - "loss": 1.1465, - "step": 1897 - }, - { - "epoch": 1.4365184484389784, - "grad_norm": 2.068406581878662, - "learning_rate": 1.087091523976269e-05, - "loss": 1.1386, - "step": 1898 - }, - { - "epoch": 1.437275307473983, - "grad_norm": 2.1216137409210205, - "learning_rate": 1.0862917598426315e-05, - "loss": 1.1177, - "step": 1899 - }, - { - "epoch": 1.4380321665089877, - "grad_norm": 2.36860990524292, - "learning_rate": 1.0854919401511002e-05, - "loss": 1.1612, - "step": 1900 - }, - { - "epoch": 1.4387890255439924, - "grad_norm": 2.2473835945129395, - "learning_rate": 1.0846920654172264e-05, - "loss": 1.1123, - "step": 1901 - }, - { - "epoch": 1.4395458845789972, - "grad_norm": 2.1833431720733643, - "learning_rate": 1.0838921361565978e-05, - "loss": 1.133, - "step": 1902 - }, - { - "epoch": 1.440302743614002, - "grad_norm": 2.078742742538452, - "learning_rate": 1.0830921528848355e-05, - "loss": 1.1634, - "step": 1903 - }, - { - "epoch": 1.4410596026490066, - "grad_norm": 2.1691906452178955, - "learning_rate": 1.0822921161175974e-05, - "loss": 1.1557, - "step": 1904 - }, - { - "epoch": 1.4418164616840112, - "grad_norm": 2.200441598892212, - "learning_rate": 1.0814920263705746e-05, - "loss": 1.1438, - "step": 1905 - }, - { - "epoch": 1.4425733207190161, - "grad_norm": 2.1800084114074707, - "learning_rate": 1.0806918841594929e-05, - "loss": 1.1395, - "step": 1906 - }, - { - "epoch": 1.4433301797540208, - "grad_norm": 2.34407901763916, - "learning_rate": 1.0798916900001117e-05, - "loss": 1.1448, - "step": 1907 - }, - { - "epoch": 1.4440870387890254, - "grad_norm": 2.1149091720581055, - "learning_rate": 1.0790914444082244e-05, - "loss": 1.1664, - "step": 1908 - }, - { - "epoch": 1.4448438978240303, - "grad_norm": 2.3421874046325684, - "learning_rate": 1.0782911478996559e-05, - "loss": 1.1109, - "step": 1909 - }, - { - "epoch": 1.445600756859035, - "grad_norm": 2.139888286590576, - "learning_rate": 1.0774908009902663e-05, - "loss": 1.1852, - "step": 1910 - }, - { - "epoch": 1.4463576158940397, - "grad_norm": 2.1743266582489014, - "learning_rate": 1.0766904041959465e-05, - "loss": 1.0994, - "step": 1911 - }, - { - "epoch": 1.4471144749290445, - "grad_norm": 2.0239481925964355, - "learning_rate": 1.0758899580326203e-05, - "loss": 1.0712, - "step": 1912 - }, - { - "epoch": 1.4478713339640492, - "grad_norm": 2.5557572841644287, - "learning_rate": 1.0750894630162429e-05, - "loss": 1.0855, - "step": 1913 - }, - { - "epoch": 1.4486281929990539, - "grad_norm": 2.1770548820495605, - "learning_rate": 1.0742889196628014e-05, - "loss": 1.1541, - "step": 1914 - }, - { - "epoch": 1.4493850520340588, - "grad_norm": 2.065044641494751, - "learning_rate": 1.073488328488314e-05, - "loss": 1.0722, - "step": 1915 - }, - { - "epoch": 1.4501419110690634, - "grad_norm": 2.274731159210205, - "learning_rate": 1.0726876900088287e-05, - "loss": 1.1562, - "step": 1916 - }, - { - "epoch": 1.450898770104068, - "grad_norm": 2.2915658950805664, - "learning_rate": 1.0718870047404253e-05, - "loss": 1.1573, - "step": 1917 - }, - { - "epoch": 1.451655629139073, - "grad_norm": 2.391997814178467, - "learning_rate": 1.0710862731992138e-05, - "loss": 1.1434, - "step": 1918 - }, - { - "epoch": 1.4524124881740776, - "grad_norm": 2.3179776668548584, - "learning_rate": 1.0702854959013332e-05, - "loss": 1.1352, - "step": 1919 - }, - { - "epoch": 1.4531693472090823, - "grad_norm": 2.223360776901245, - "learning_rate": 1.0694846733629519e-05, - "loss": 1.1152, - "step": 1920 - }, - { - "epoch": 1.453926206244087, - "grad_norm": 2.222038984298706, - "learning_rate": 1.0686838061002684e-05, - "loss": 1.0796, - "step": 1921 - }, - { - "epoch": 1.4546830652790916, - "grad_norm": 2.1372921466827393, - "learning_rate": 1.0678828946295099e-05, - "loss": 1.1047, - "step": 1922 - }, - { - "epoch": 1.4554399243140965, - "grad_norm": 2.1707942485809326, - "learning_rate": 1.0670819394669308e-05, - "loss": 1.1509, - "step": 1923 - }, - { - "epoch": 1.4561967833491012, - "grad_norm": 2.112736463546753, - "learning_rate": 1.066280941128815e-05, - "loss": 1.1266, - "step": 1924 - }, - { - "epoch": 1.4569536423841059, - "grad_norm": 2.3490540981292725, - "learning_rate": 1.065479900131474e-05, - "loss": 1.1621, - "step": 1925 - }, - { - "epoch": 1.4577105014191107, - "grad_norm": 2.17901873588562, - "learning_rate": 1.0646788169912465e-05, - "loss": 1.0735, - "step": 1926 - }, - { - "epoch": 1.4584673604541154, - "grad_norm": 2.0860230922698975, - "learning_rate": 1.0638776922244982e-05, - "loss": 1.1362, - "step": 1927 - }, - { - "epoch": 1.45922421948912, - "grad_norm": 2.0391974449157715, - "learning_rate": 1.0630765263476221e-05, - "loss": 1.1316, - "step": 1928 - }, - { - "epoch": 1.459981078524125, - "grad_norm": 2.0687365531921387, - "learning_rate": 1.062275319877038e-05, - "loss": 1.128, - "step": 1929 - }, - { - "epoch": 1.4607379375591296, - "grad_norm": 2.0217580795288086, - "learning_rate": 1.0614740733291902e-05, - "loss": 1.1377, - "step": 1930 - }, - { - "epoch": 1.4614947965941343, - "grad_norm": 2.0296125411987305, - "learning_rate": 1.060672787220551e-05, - "loss": 1.1236, - "step": 1931 - }, - { - "epoch": 1.4622516556291392, - "grad_norm": 2.0273191928863525, - "learning_rate": 1.0598714620676171e-05, - "loss": 1.1271, - "step": 1932 - }, - { - "epoch": 1.4630085146641438, - "grad_norm": 2.011613130569458, - "learning_rate": 1.05907009838691e-05, - "loss": 1.1457, - "step": 1933 - }, - { - "epoch": 1.4637653736991485, - "grad_norm": 2.0048105716705322, - "learning_rate": 1.058268696694977e-05, - "loss": 1.1499, - "step": 1934 - }, - { - "epoch": 1.4645222327341534, - "grad_norm": 2.086610794067383, - "learning_rate": 1.0574672575083891e-05, - "loss": 1.1376, - "step": 1935 - }, - { - "epoch": 1.465279091769158, - "grad_norm": 2.2125232219696045, - "learning_rate": 1.0566657813437419e-05, - "loss": 1.1103, - "step": 1936 - }, - { - "epoch": 1.4660359508041627, - "grad_norm": 2.172860622406006, - "learning_rate": 1.0558642687176548e-05, - "loss": 1.1306, - "step": 1937 - }, - { - "epoch": 1.4667928098391674, - "grad_norm": 2.1361825466156006, - "learning_rate": 1.0550627201467702e-05, - "loss": 1.0978, - "step": 1938 - }, - { - "epoch": 1.4675496688741723, - "grad_norm": 2.0148260593414307, - "learning_rate": 1.0542611361477548e-05, - "loss": 1.0851, - "step": 1939 - }, - { - "epoch": 1.468306527909177, - "grad_norm": 2.103895664215088, - "learning_rate": 1.0534595172372967e-05, - "loss": 1.1197, - "step": 1940 - }, - { - "epoch": 1.4690633869441816, - "grad_norm": 2.1808462142944336, - "learning_rate": 1.0526578639321078e-05, - "loss": 1.1192, - "step": 1941 - }, - { - "epoch": 1.4698202459791863, - "grad_norm": 2.2360849380493164, - "learning_rate": 1.0518561767489211e-05, - "loss": 1.1142, - "step": 1942 - }, - { - "epoch": 1.4705771050141911, - "grad_norm": 2.243360996246338, - "learning_rate": 1.0510544562044925e-05, - "loss": 1.2133, - "step": 1943 - }, - { - "epoch": 1.4713339640491958, - "grad_norm": 2.07759690284729, - "learning_rate": 1.050252702815598e-05, - "loss": 1.1227, - "step": 1944 - }, - { - "epoch": 1.4720908230842005, - "grad_norm": 2.0380797386169434, - "learning_rate": 1.0494509170990362e-05, - "loss": 1.0894, - "step": 1945 - }, - { - "epoch": 1.4728476821192054, - "grad_norm": 2.184549570083618, - "learning_rate": 1.0486490995716264e-05, - "loss": 1.1314, - "step": 1946 - }, - { - "epoch": 1.47360454115421, - "grad_norm": 2.1510207653045654, - "learning_rate": 1.0478472507502069e-05, - "loss": 1.1688, - "step": 1947 - }, - { - "epoch": 1.4743614001892147, - "grad_norm": 2.1699905395507812, - "learning_rate": 1.0470453711516377e-05, - "loss": 1.1374, - "step": 1948 - }, - { - "epoch": 1.4751182592242196, - "grad_norm": 2.0163750648498535, - "learning_rate": 1.0462434612927984e-05, - "loss": 1.1469, - "step": 1949 - }, - { - "epoch": 1.4758751182592242, - "grad_norm": 2.176668882369995, - "learning_rate": 1.0454415216905875e-05, - "loss": 1.154, - "step": 1950 - }, - { - "epoch": 1.476631977294229, - "grad_norm": 2.18507981300354, - "learning_rate": 1.0446395528619236e-05, - "loss": 1.1175, - "step": 1951 - }, - { - "epoch": 1.4773888363292338, - "grad_norm": 2.033001661300659, - "learning_rate": 1.0438375553237428e-05, - "loss": 1.1129, - "step": 1952 - }, - { - "epoch": 1.4781456953642385, - "grad_norm": 2.0419886112213135, - "learning_rate": 1.0430355295930008e-05, - "loss": 1.1455, - "step": 1953 - }, - { - "epoch": 1.4789025543992431, - "grad_norm": 2.083308696746826, - "learning_rate": 1.0422334761866715e-05, - "loss": 1.1069, - "step": 1954 - }, - { - "epoch": 1.479659413434248, - "grad_norm": 2.0463309288024902, - "learning_rate": 1.0414313956217456e-05, - "loss": 1.1456, - "step": 1955 - }, - { - "epoch": 1.4804162724692527, - "grad_norm": 2.0065863132476807, - "learning_rate": 1.0406292884152327e-05, - "loss": 1.0829, - "step": 1956 - }, - { - "epoch": 1.4811731315042573, - "grad_norm": 1.8798035383224487, - "learning_rate": 1.0398271550841586e-05, - "loss": 1.1378, - "step": 1957 - }, - { - "epoch": 1.481929990539262, - "grad_norm": 2.483062267303467, - "learning_rate": 1.0390249961455658e-05, - "loss": 1.0775, - "step": 1958 - }, - { - "epoch": 1.4826868495742667, - "grad_norm": 1.995613694190979, - "learning_rate": 1.0382228121165146e-05, - "loss": 1.1936, - "step": 1959 - }, - { - "epoch": 1.4834437086092715, - "grad_norm": 2.1545281410217285, - "learning_rate": 1.03742060351408e-05, - "loss": 1.1802, - "step": 1960 - }, - { - "epoch": 1.4842005676442762, - "grad_norm": 2.1138501167297363, - "learning_rate": 1.0366183708553532e-05, - "loss": 1.102, - "step": 1961 - }, - { - "epoch": 1.4849574266792809, - "grad_norm": 2.1736159324645996, - "learning_rate": 1.0358161146574417e-05, - "loss": 1.1844, - "step": 1962 - }, - { - "epoch": 1.4857142857142858, - "grad_norm": 2.0476620197296143, - "learning_rate": 1.0350138354374675e-05, - "loss": 1.1117, - "step": 1963 - }, - { - "epoch": 1.4864711447492904, - "grad_norm": 2.070690631866455, - "learning_rate": 1.034211533712567e-05, - "loss": 1.0858, - "step": 1964 - }, - { - "epoch": 1.487228003784295, - "grad_norm": 2.256793975830078, - "learning_rate": 1.0334092099998926e-05, - "loss": 1.1564, - "step": 1965 - }, - { - "epoch": 1.4879848628193, - "grad_norm": 1.8769042491912842, - "learning_rate": 1.0326068648166088e-05, - "loss": 1.1211, - "step": 1966 - }, - { - "epoch": 1.4887417218543046, - "grad_norm": 2.271409749984741, - "learning_rate": 1.0318044986798961e-05, - "loss": 1.1329, - "step": 1967 - }, - { - "epoch": 1.4894985808893093, - "grad_norm": 2.035731315612793, - "learning_rate": 1.031002112106947e-05, - "loss": 1.0566, - "step": 1968 - }, - { - "epoch": 1.4902554399243142, - "grad_norm": 1.9846116304397583, - "learning_rate": 1.0301997056149678e-05, - "loss": 1.1373, - "step": 1969 - }, - { - "epoch": 1.4910122989593189, - "grad_norm": 2.304295301437378, - "learning_rate": 1.0293972797211774e-05, - "loss": 1.1098, - "step": 1970 - }, - { - "epoch": 1.4917691579943235, - "grad_norm": 2.189412832260132, - "learning_rate": 1.028594834942807e-05, - "loss": 1.1026, - "step": 1971 - }, - { - "epoch": 1.4925260170293284, - "grad_norm": 2.1527864933013916, - "learning_rate": 1.0277923717971006e-05, - "loss": 1.1262, - "step": 1972 - }, - { - "epoch": 1.493282876064333, - "grad_norm": 2.20159912109375, - "learning_rate": 1.026989890801314e-05, - "loss": 1.092, - "step": 1973 - }, - { - "epoch": 1.4940397350993377, - "grad_norm": 2.2014966011047363, - "learning_rate": 1.0261873924727138e-05, - "loss": 1.1267, - "step": 1974 - }, - { - "epoch": 1.4947965941343424, - "grad_norm": 2.194817304611206, - "learning_rate": 1.0253848773285778e-05, - "loss": 1.1565, - "step": 1975 - }, - { - "epoch": 1.4955534531693473, - "grad_norm": 2.061915636062622, - "learning_rate": 1.0245823458861958e-05, - "loss": 1.1291, - "step": 1976 - }, - { - "epoch": 1.496310312204352, - "grad_norm": 2.1642725467681885, - "learning_rate": 1.0237797986628672e-05, - "loss": 1.1161, - "step": 1977 - }, - { - "epoch": 1.4970671712393566, - "grad_norm": 2.0526773929595947, - "learning_rate": 1.022977236175901e-05, - "loss": 1.1583, - "step": 1978 - }, - { - "epoch": 1.4978240302743613, - "grad_norm": 2.0349247455596924, - "learning_rate": 1.0221746589426176e-05, - "loss": 1.1161, - "step": 1979 - }, - { - "epoch": 1.4985808893093662, - "grad_norm": 2.1763689517974854, - "learning_rate": 1.0213720674803458e-05, - "loss": 1.1344, - "step": 1980 - }, - { - "epoch": 1.4993377483443708, - "grad_norm": 2.139963150024414, - "learning_rate": 1.0205694623064236e-05, - "loss": 1.2086, - "step": 1981 - }, - { - "epoch": 1.5000946073793755, - "grad_norm": 2.633737564086914, - "learning_rate": 1.0197668439381978e-05, - "loss": 1.1523, - "step": 1982 - }, - { - "epoch": 1.5008514664143804, - "grad_norm": 2.0594277381896973, - "learning_rate": 1.0189642128930246e-05, - "loss": 1.1436, - "step": 1983 - }, - { - "epoch": 1.501608325449385, - "grad_norm": 2.1511809825897217, - "learning_rate": 1.0181615696882676e-05, - "loss": 1.1195, - "step": 1984 - }, - { - "epoch": 1.5023651844843897, - "grad_norm": 1.992146372795105, - "learning_rate": 1.0173589148412981e-05, - "loss": 1.1534, - "step": 1985 - }, - { - "epoch": 1.5031220435193946, - "grad_norm": 2.000650644302368, - "learning_rate": 1.0165562488694953e-05, - "loss": 1.1158, - "step": 1986 - }, - { - "epoch": 1.5038789025543993, - "grad_norm": 2.0944910049438477, - "learning_rate": 1.0157535722902456e-05, - "loss": 1.0991, - "step": 1987 - }, - { - "epoch": 1.504635761589404, - "grad_norm": 2.3380539417266846, - "learning_rate": 1.0149508856209416e-05, - "loss": 1.148, - "step": 1988 - }, - { - "epoch": 1.5053926206244088, - "grad_norm": 2.1337814331054688, - "learning_rate": 1.014148189378983e-05, - "loss": 1.1508, - "step": 1989 - }, - { - "epoch": 1.5061494796594135, - "grad_norm": 2.069946765899658, - "learning_rate": 1.0133454840817765e-05, - "loss": 1.1449, - "step": 1990 - }, - { - "epoch": 1.5069063386944181, - "grad_norm": 2.4194324016571045, - "learning_rate": 1.0125427702467327e-05, - "loss": 1.1833, - "step": 1991 - }, - { - "epoch": 1.507663197729423, - "grad_norm": 2.0037777423858643, - "learning_rate": 1.0117400483912687e-05, - "loss": 1.1053, - "step": 1992 - }, - { - "epoch": 1.5084200567644275, - "grad_norm": 1.9638372659683228, - "learning_rate": 1.010937319032807e-05, - "loss": 1.1249, - "step": 1993 - }, - { - "epoch": 1.5091769157994324, - "grad_norm": 2.185102939605713, - "learning_rate": 1.0101345826887752e-05, - "loss": 1.1369, - "step": 1994 - }, - { - "epoch": 1.5099337748344372, - "grad_norm": 2.193578004837036, - "learning_rate": 1.0093318398766042e-05, - "loss": 1.1268, - "step": 1995 - }, - { - "epoch": 1.5106906338694417, - "grad_norm": 2.1746068000793457, - "learning_rate": 1.0085290911137298e-05, - "loss": 1.1316, - "step": 1996 - }, - { - "epoch": 1.5114474929044466, - "grad_norm": 2.308969736099243, - "learning_rate": 1.0077263369175918e-05, - "loss": 1.11, - "step": 1997 - }, - { - "epoch": 1.5122043519394512, - "grad_norm": 2.2050511837005615, - "learning_rate": 1.0069235778056336e-05, - "loss": 1.1363, - "step": 1998 - }, - { - "epoch": 1.512961210974456, - "grad_norm": 2.351792812347412, - "learning_rate": 1.0061208142953012e-05, - "loss": 1.1222, - "step": 1999 - }, - { - "epoch": 1.5137180700094608, - "grad_norm": 2.144644021987915, - "learning_rate": 1.0053180469040433e-05, - "loss": 1.0997, - "step": 2000 - }, - { - "epoch": 1.5144749290444655, - "grad_norm": 2.1637988090515137, - "learning_rate": 1.0045152761493127e-05, - "loss": 1.0968, - "step": 2001 - }, - { - "epoch": 1.5152317880794701, - "grad_norm": 2.200721502304077, - "learning_rate": 1.0037125025485616e-05, - "loss": 1.1016, - "step": 2002 - }, - { - "epoch": 1.515988647114475, - "grad_norm": 2.3035366535186768, - "learning_rate": 1.0029097266192467e-05, - "loss": 1.1659, - "step": 2003 - }, - { - "epoch": 1.5167455061494797, - "grad_norm": 2.34773588180542, - "learning_rate": 1.0021069488788253e-05, - "loss": 1.0888, - "step": 2004 - }, - { - "epoch": 1.5175023651844843, - "grad_norm": 2.268134117126465, - "learning_rate": 1.0013041698447547e-05, - "loss": 1.1519, - "step": 2005 - }, - { - "epoch": 1.5182592242194892, - "grad_norm": 2.331434726715088, - "learning_rate": 1.000501390034495e-05, - "loss": 1.1335, - "step": 2006 - }, - { - "epoch": 1.5190160832544939, - "grad_norm": 2.3400261402130127, - "learning_rate": 9.996986099655052e-06, - "loss": 1.1808, - "step": 2007 - }, - { - "epoch": 1.5197729422894986, - "grad_norm": 2.348576068878174, - "learning_rate": 9.988958301552454e-06, - "loss": 1.1358, - "step": 2008 - }, - { - "epoch": 1.5205298013245034, - "grad_norm": 2.131770610809326, - "learning_rate": 9.980930511211751e-06, - "loss": 1.0952, - "step": 2009 - }, - { - "epoch": 1.5212866603595079, - "grad_norm": 2.3337466716766357, - "learning_rate": 9.972902733807532e-06, - "loss": 1.1449, - "step": 2010 - }, - { - "epoch": 1.5220435193945128, - "grad_norm": 2.0936343669891357, - "learning_rate": 9.964874974514386e-06, - "loss": 1.1176, - "step": 2011 - }, - { - "epoch": 1.5228003784295177, - "grad_norm": 2.0963313579559326, - "learning_rate": 9.95684723850688e-06, - "loss": 1.1481, - "step": 2012 - }, - { - "epoch": 1.523557237464522, - "grad_norm": 2.055452823638916, - "learning_rate": 9.948819530959566e-06, - "loss": 1.1008, - "step": 2013 - }, - { - "epoch": 1.524314096499527, - "grad_norm": 2.257266044616699, - "learning_rate": 9.94079185704699e-06, - "loss": 1.1544, - "step": 2014 - }, - { - "epoch": 1.5250709555345316, - "grad_norm": 2.06075119972229, - "learning_rate": 9.932764221943666e-06, - "loss": 1.1153, - "step": 2015 - }, - { - "epoch": 1.5258278145695363, - "grad_norm": 2.2544174194335938, - "learning_rate": 9.924736630824083e-06, - "loss": 1.0718, - "step": 2016 - }, - { - "epoch": 1.5265846736045412, - "grad_norm": 2.1009559631347656, - "learning_rate": 9.916709088862707e-06, - "loss": 1.137, - "step": 2017 - }, - { - "epoch": 1.5273415326395459, - "grad_norm": 2.014848470687866, - "learning_rate": 9.908681601233964e-06, - "loss": 1.1268, - "step": 2018 - }, - { - "epoch": 1.5280983916745505, - "grad_norm": 2.2673892974853516, - "learning_rate": 9.900654173112251e-06, - "loss": 1.1528, - "step": 2019 - }, - { - "epoch": 1.5288552507095554, - "grad_norm": 2.206071138381958, - "learning_rate": 9.89262680967193e-06, - "loss": 1.1013, - "step": 2020 - }, - { - "epoch": 1.52961210974456, - "grad_norm": 2.102032423019409, - "learning_rate": 9.884599516087314e-06, - "loss": 1.105, - "step": 2021 - }, - { - "epoch": 1.5303689687795647, - "grad_norm": 2.1362051963806152, - "learning_rate": 9.876572297532677e-06, - "loss": 1.1479, - "step": 2022 - }, - { - "epoch": 1.5311258278145696, - "grad_norm": 2.0666024684906006, - "learning_rate": 9.868545159182238e-06, - "loss": 1.1257, - "step": 2023 - }, - { - "epoch": 1.5318826868495743, - "grad_norm": 1.9618515968322754, - "learning_rate": 9.860518106210167e-06, - "loss": 1.0649, - "step": 2024 - }, - { - "epoch": 1.532639545884579, - "grad_norm": 2.202753782272339, - "learning_rate": 9.852491143790587e-06, - "loss": 1.1016, - "step": 2025 - }, - { - "epoch": 1.5333964049195838, - "grad_norm": 1.9656624794006348, - "learning_rate": 9.844464277097549e-06, - "loss": 1.1435, - "step": 2026 - }, - { - "epoch": 1.5341532639545885, - "grad_norm": 2.06479811668396, - "learning_rate": 9.83643751130505e-06, - "loss": 1.1393, - "step": 2027 - }, - { - "epoch": 1.5349101229895932, - "grad_norm": 2.1722230911254883, - "learning_rate": 9.828410851587023e-06, - "loss": 1.1587, - "step": 2028 - }, - { - "epoch": 1.535666982024598, - "grad_norm": 2.090200901031494, - "learning_rate": 9.820384303117328e-06, - "loss": 1.109, - "step": 2029 - }, - { - "epoch": 1.5364238410596025, - "grad_norm": 1.9950278997421265, - "learning_rate": 9.812357871069754e-06, - "loss": 1.0648, - "step": 2030 - }, - { - "epoch": 1.5371807000946074, - "grad_norm": 2.141153573989868, - "learning_rate": 9.804331560618023e-06, - "loss": 1.1327, - "step": 2031 - }, - { - "epoch": 1.5379375591296123, - "grad_norm": 1.9659839868545532, - "learning_rate": 9.79630537693577e-06, - "loss": 1.1194, - "step": 2032 - }, - { - "epoch": 1.5386944181646167, - "grad_norm": 2.187727212905884, - "learning_rate": 9.788279325196547e-06, - "loss": 1.1225, - "step": 2033 - }, - { - "epoch": 1.5394512771996216, - "grad_norm": 2.4570298194885254, - "learning_rate": 9.780253410573827e-06, - "loss": 1.1252, - "step": 2034 - }, - { - "epoch": 1.5402081362346263, - "grad_norm": 2.2557146549224854, - "learning_rate": 9.772227638240993e-06, - "loss": 1.1698, - "step": 2035 - }, - { - "epoch": 1.540964995269631, - "grad_norm": 2.384152889251709, - "learning_rate": 9.764202013371333e-06, - "loss": 1.1447, - "step": 2036 - }, - { - "epoch": 1.5417218543046358, - "grad_norm": 2.0934481620788574, - "learning_rate": 9.756176541138045e-06, - "loss": 1.1429, - "step": 2037 - }, - { - "epoch": 1.5424787133396405, - "grad_norm": 2.418853282928467, - "learning_rate": 9.748151226714222e-06, - "loss": 1.1321, - "step": 2038 - }, - { - "epoch": 1.5432355723746451, - "grad_norm": 2.0321297645568848, - "learning_rate": 9.740126075272868e-06, - "loss": 1.0983, - "step": 2039 - }, - { - "epoch": 1.54399243140965, - "grad_norm": 2.088118076324463, - "learning_rate": 9.732101091986864e-06, - "loss": 1.1566, - "step": 2040 - }, - { - "epoch": 1.5447492904446547, - "grad_norm": 2.135477066040039, - "learning_rate": 9.724076282028993e-06, - "loss": 1.0886, - "step": 2041 - }, - { - "epoch": 1.5455061494796594, - "grad_norm": 2.1262335777282715, - "learning_rate": 9.716051650571933e-06, - "loss": 1.1461, - "step": 2042 - }, - { - "epoch": 1.5462630085146643, - "grad_norm": 2.2694787979125977, - "learning_rate": 9.708027202788229e-06, - "loss": 1.1725, - "step": 2043 - }, - { - "epoch": 1.547019867549669, - "grad_norm": 2.0884077548980713, - "learning_rate": 9.700002943850323e-06, - "loss": 1.1098, - "step": 2044 - }, - { - "epoch": 1.5477767265846736, - "grad_norm": 2.5522899627685547, - "learning_rate": 9.691978878930532e-06, - "loss": 1.0861, - "step": 2045 - }, - { - "epoch": 1.5485335856196785, - "grad_norm": 2.099339008331299, - "learning_rate": 9.68395501320104e-06, - "loss": 1.135, - "step": 2046 - }, - { - "epoch": 1.549290444654683, - "grad_norm": 2.0966038703918457, - "learning_rate": 9.675931351833911e-06, - "loss": 1.1468, - "step": 2047 - }, - { - "epoch": 1.5500473036896878, - "grad_norm": 1.972170352935791, - "learning_rate": 9.667907900001079e-06, - "loss": 1.0958, - "step": 2048 - }, - { - "epoch": 1.5508041627246927, - "grad_norm": 2.228671073913574, - "learning_rate": 9.659884662874332e-06, - "loss": 1.1338, - "step": 2049 - }, - { - "epoch": 1.5515610217596971, - "grad_norm": 1.9483565092086792, - "learning_rate": 9.65186164562533e-06, - "loss": 1.0757, - "step": 2050 - }, - { - "epoch": 1.552317880794702, - "grad_norm": 2.0136473178863525, - "learning_rate": 9.643838853425586e-06, - "loss": 1.1177, - "step": 2051 - }, - { - "epoch": 1.5530747398297067, - "grad_norm": 2.0868184566497803, - "learning_rate": 9.635816291446469e-06, - "loss": 1.1814, - "step": 2052 - }, - { - "epoch": 1.5538315988647113, - "grad_norm": 1.9814927577972412, - "learning_rate": 9.627793964859205e-06, - "loss": 1.1342, - "step": 2053 - }, - { - "epoch": 1.5545884578997162, - "grad_norm": 2.0822665691375732, - "learning_rate": 9.619771878834858e-06, - "loss": 1.0962, - "step": 2054 - }, - { - "epoch": 1.555345316934721, - "grad_norm": 2.0414929389953613, - "learning_rate": 9.611750038544343e-06, - "loss": 1.1178, - "step": 2055 - }, - { - "epoch": 1.5561021759697256, - "grad_norm": 2.134589672088623, - "learning_rate": 9.60372844915842e-06, - "loss": 1.133, - "step": 2056 - }, - { - "epoch": 1.5568590350047304, - "grad_norm": 2.3856427669525146, - "learning_rate": 9.595707115847676e-06, - "loss": 1.1252, - "step": 2057 - }, - { - "epoch": 1.557615894039735, - "grad_norm": 2.275172710418701, - "learning_rate": 9.587686043782545e-06, - "loss": 1.1195, - "step": 2058 - }, - { - "epoch": 1.5583727530747398, - "grad_norm": 2.2985713481903076, - "learning_rate": 9.579665238133291e-06, - "loss": 1.148, - "step": 2059 - }, - { - "epoch": 1.5591296121097447, - "grad_norm": 2.0514907836914062, - "learning_rate": 9.571644704069995e-06, - "loss": 1.1307, - "step": 2060 - }, - { - "epoch": 1.5598864711447493, - "grad_norm": 2.2182204723358154, - "learning_rate": 9.563624446762576e-06, - "loss": 1.0913, - "step": 2061 - }, - { - "epoch": 1.560643330179754, - "grad_norm": 2.628448486328125, - "learning_rate": 9.555604471380767e-06, - "loss": 1.1385, - "step": 2062 - }, - { - "epoch": 1.5614001892147589, - "grad_norm": 1.9690390825271606, - "learning_rate": 9.547584783094126e-06, - "loss": 1.1314, - "step": 2063 - }, - { - "epoch": 1.5621570482497635, - "grad_norm": 2.0425853729248047, - "learning_rate": 9.539565387072019e-06, - "loss": 1.1048, - "step": 2064 - }, - { - "epoch": 1.5629139072847682, - "grad_norm": 2.025308609008789, - "learning_rate": 9.531546288483624e-06, - "loss": 1.1012, - "step": 2065 - }, - { - "epoch": 1.563670766319773, - "grad_norm": 2.17830491065979, - "learning_rate": 9.523527492497934e-06, - "loss": 1.1926, - "step": 2066 - }, - { - "epoch": 1.5644276253547775, - "grad_norm": 3.244462251663208, - "learning_rate": 9.51550900428374e-06, - "loss": 1.0953, - "step": 2067 - }, - { - "epoch": 1.5651844843897824, - "grad_norm": 2.0593700408935547, - "learning_rate": 9.507490829009639e-06, - "loss": 1.1161, - "step": 2068 - }, - { - "epoch": 1.565941343424787, - "grad_norm": 2.0308477878570557, - "learning_rate": 9.49947297184402e-06, - "loss": 1.0959, - "step": 2069 - }, - { - "epoch": 1.5666982024597917, - "grad_norm": 2.1143085956573486, - "learning_rate": 9.491455437955081e-06, - "loss": 1.1541, - "step": 2070 - }, - { - "epoch": 1.5674550614947966, - "grad_norm": 2.3376524448394775, - "learning_rate": 9.483438232510792e-06, - "loss": 1.1283, - "step": 2071 - }, - { - "epoch": 1.5682119205298013, - "grad_norm": 2.194188117980957, - "learning_rate": 9.475421360678926e-06, - "loss": 1.1256, - "step": 2072 - }, - { - "epoch": 1.568968779564806, - "grad_norm": 2.12689208984375, - "learning_rate": 9.467404827627036e-06, - "loss": 1.1638, - "step": 2073 - }, - { - "epoch": 1.5697256385998108, - "grad_norm": 1.9852758646011353, - "learning_rate": 9.459388638522455e-06, - "loss": 1.1458, - "step": 2074 - }, - { - "epoch": 1.5704824976348155, - "grad_norm": 1.958489179611206, - "learning_rate": 9.4513727985323e-06, - "loss": 1.1222, - "step": 2075 - }, - { - "epoch": 1.5712393566698202, - "grad_norm": 2.1876025199890137, - "learning_rate": 9.443357312823454e-06, - "loss": 1.1322, - "step": 2076 - }, - { - "epoch": 1.571996215704825, - "grad_norm": 2.1041505336761475, - "learning_rate": 9.435342186562582e-06, - "loss": 1.1237, - "step": 2077 - }, - { - "epoch": 1.5727530747398297, - "grad_norm": 2.247180461883545, - "learning_rate": 9.427327424916113e-06, - "loss": 1.056, - "step": 2078 - }, - { - "epoch": 1.5735099337748344, - "grad_norm": 2.184521436691284, - "learning_rate": 9.419313033050232e-06, - "loss": 1.1022, - "step": 2079 - }, - { - "epoch": 1.5742667928098393, - "grad_norm": 2.078411340713501, - "learning_rate": 9.411299016130902e-06, - "loss": 1.1526, - "step": 2080 - }, - { - "epoch": 1.575023651844844, - "grad_norm": 2.154078245162964, - "learning_rate": 9.403285379323833e-06, - "loss": 1.2138, - "step": 2081 - }, - { - "epoch": 1.5757805108798486, - "grad_norm": 2.0813803672790527, - "learning_rate": 9.395272127794491e-06, - "loss": 1.0913, - "step": 2082 - }, - { - "epoch": 1.5765373699148535, - "grad_norm": 1.975311517715454, - "learning_rate": 9.387259266708104e-06, - "loss": 1.1674, - "step": 2083 - }, - { - "epoch": 1.577294228949858, - "grad_norm": 2.022935152053833, - "learning_rate": 9.379246801229626e-06, - "loss": 1.0664, - "step": 2084 - }, - { - "epoch": 1.5780510879848628, - "grad_norm": 2.3060450553894043, - "learning_rate": 9.371234736523781e-06, - "loss": 1.0884, - "step": 2085 - }, - { - "epoch": 1.5788079470198677, - "grad_norm": 2.224121570587158, - "learning_rate": 9.36322307775502e-06, - "loss": 1.1056, - "step": 2086 - }, - { - "epoch": 1.5795648060548722, - "grad_norm": 2.4794466495513916, - "learning_rate": 9.35521183008754e-06, - "loss": 1.1189, - "step": 2087 - }, - { - "epoch": 1.580321665089877, - "grad_norm": 2.0150938034057617, - "learning_rate": 9.347200998685261e-06, - "loss": 1.1063, - "step": 2088 - }, - { - "epoch": 1.5810785241248817, - "grad_norm": 2.3067104816436768, - "learning_rate": 9.339190588711852e-06, - "loss": 1.1081, - "step": 2089 - }, - { - "epoch": 1.5818353831598864, - "grad_norm": 2.071730613708496, - "learning_rate": 9.331180605330695e-06, - "loss": 1.1256, - "step": 2090 - }, - { - "epoch": 1.5825922421948913, - "grad_norm": 2.099440097808838, - "learning_rate": 9.323171053704904e-06, - "loss": 1.1306, - "step": 2091 - }, - { - "epoch": 1.583349101229896, - "grad_norm": 2.1519389152526855, - "learning_rate": 9.315161938997315e-06, - "loss": 1.1495, - "step": 2092 - }, - { - "epoch": 1.5841059602649006, - "grad_norm": 2.1621830463409424, - "learning_rate": 9.30715326637048e-06, - "loss": 1.1637, - "step": 2093 - }, - { - "epoch": 1.5848628192999055, - "grad_norm": 2.2661333084106445, - "learning_rate": 9.299145040986674e-06, - "loss": 1.1102, - "step": 2094 - }, - { - "epoch": 1.5856196783349101, - "grad_norm": 2.2131712436676025, - "learning_rate": 9.291137268007863e-06, - "loss": 1.1336, - "step": 2095 - }, - { - "epoch": 1.5863765373699148, - "grad_norm": 2.1026811599731445, - "learning_rate": 9.283129952595747e-06, - "loss": 1.0903, - "step": 2096 - }, - { - "epoch": 1.5871333964049197, - "grad_norm": 2.0890021324157715, - "learning_rate": 9.275123099911719e-06, - "loss": 1.1232, - "step": 2097 - }, - { - "epoch": 1.5878902554399243, - "grad_norm": 2.1274547576904297, - "learning_rate": 9.267116715116866e-06, - "loss": 1.1067, - "step": 2098 - }, - { - "epoch": 1.588647114474929, - "grad_norm": 2.193621873855591, - "learning_rate": 9.259110803371987e-06, - "loss": 1.1304, - "step": 2099 - }, - { - "epoch": 1.589403973509934, - "grad_norm": 2.1822807788848877, - "learning_rate": 9.251105369837574e-06, - "loss": 1.1568, - "step": 2100 - }, - { - "epoch": 1.5901608325449383, - "grad_norm": 1.883682131767273, - "learning_rate": 9.243100419673798e-06, - "loss": 1.129, - "step": 2101 - }, - { - "epoch": 1.5909176915799432, - "grad_norm": 2.0487449169158936, - "learning_rate": 9.235095958040535e-06, - "loss": 1.1436, - "step": 2102 - }, - { - "epoch": 1.5916745506149481, - "grad_norm": 2.079259157180786, - "learning_rate": 9.22709199009734e-06, - "loss": 1.1538, - "step": 2103 - }, - { - "epoch": 1.5924314096499526, - "grad_norm": 2.1335606575012207, - "learning_rate": 9.219088521003444e-06, - "loss": 1.1106, - "step": 2104 - }, - { - "epoch": 1.5931882686849574, - "grad_norm": 3.516350507736206, - "learning_rate": 9.211085555917764e-06, - "loss": 1.1, - "step": 2105 - }, - { - "epoch": 1.593945127719962, - "grad_norm": 2.226984739303589, - "learning_rate": 9.203083099998885e-06, - "loss": 1.149, - "step": 2106 - }, - { - "epoch": 1.5947019867549668, - "grad_norm": 2.139308452606201, - "learning_rate": 9.195081158405074e-06, - "loss": 1.1636, - "step": 2107 - }, - { - "epoch": 1.5954588457899717, - "grad_norm": 2.194244146347046, - "learning_rate": 9.187079736294258e-06, - "loss": 1.1441, - "step": 2108 - }, - { - "epoch": 1.5962157048249763, - "grad_norm": 2.349120855331421, - "learning_rate": 9.179078838824029e-06, - "loss": 1.1093, - "step": 2109 - }, - { - "epoch": 1.596972563859981, - "grad_norm": 2.0828843116760254, - "learning_rate": 9.171078471151646e-06, - "loss": 1.1357, - "step": 2110 - }, - { - "epoch": 1.5977294228949859, - "grad_norm": 2.10848331451416, - "learning_rate": 9.163078638434028e-06, - "loss": 1.1125, - "step": 2111 - }, - { - "epoch": 1.5984862819299905, - "grad_norm": 2.1101763248443604, - "learning_rate": 9.155079345827737e-06, - "loss": 1.0964, - "step": 2112 - }, - { - "epoch": 1.5992431409649952, - "grad_norm": 2.0837841033935547, - "learning_rate": 9.147080598488999e-06, - "loss": 1.1121, - "step": 2113 - }, - { - "epoch": 1.6, - "grad_norm": 1.9847743511199951, - "learning_rate": 9.13908240157369e-06, - "loss": 1.1153, - "step": 2114 - }, - { - "epoch": 1.6007568590350048, - "grad_norm": 2.0701873302459717, - "learning_rate": 9.131084760237314e-06, - "loss": 1.1046, - "step": 2115 - }, - { - "epoch": 1.6015137180700094, - "grad_norm": 2.1153228282928467, - "learning_rate": 9.123087679635039e-06, - "loss": 1.1126, - "step": 2116 - }, - { - "epoch": 1.6022705771050143, - "grad_norm": 2.0515284538269043, - "learning_rate": 9.115091164921654e-06, - "loss": 1.1266, - "step": 2117 - }, - { - "epoch": 1.603027436140019, - "grad_norm": 2.08406662940979, - "learning_rate": 9.107095221251597e-06, - "loss": 1.1355, - "step": 2118 - }, - { - "epoch": 1.6037842951750236, - "grad_norm": 2.6160190105438232, - "learning_rate": 9.099099853778927e-06, - "loss": 1.1226, - "step": 2119 - }, - { - "epoch": 1.6045411542100285, - "grad_norm": 2.024075984954834, - "learning_rate": 9.091105067657335e-06, - "loss": 1.0951, - "step": 2120 - }, - { - "epoch": 1.605298013245033, - "grad_norm": 1.9903373718261719, - "learning_rate": 9.083110868040142e-06, - "loss": 1.1485, - "step": 2121 - }, - { - "epoch": 1.6060548722800378, - "grad_norm": 2.307220458984375, - "learning_rate": 9.075117260080286e-06, - "loss": 1.1698, - "step": 2122 - }, - { - "epoch": 1.6068117313150427, - "grad_norm": 2.160867929458618, - "learning_rate": 9.067124248930324e-06, - "loss": 1.0852, - "step": 2123 - }, - { - "epoch": 1.6075685903500472, - "grad_norm": 2.255039691925049, - "learning_rate": 9.059131839742425e-06, - "loss": 1.1299, - "step": 2124 - }, - { - "epoch": 1.608325449385052, - "grad_norm": 2.122530698776245, - "learning_rate": 9.051140037668385e-06, - "loss": 1.1148, - "step": 2125 - }, - { - "epoch": 1.6090823084200567, - "grad_norm": 2.067059278488159, - "learning_rate": 9.043148847859588e-06, - "loss": 1.1335, - "step": 2126 - }, - { - "epoch": 1.6098391674550614, - "grad_norm": 3.3186850547790527, - "learning_rate": 9.035158275467037e-06, - "loss": 1.0892, - "step": 2127 - }, - { - "epoch": 1.6105960264900663, - "grad_norm": 2.4546923637390137, - "learning_rate": 9.02716832564133e-06, - "loss": 1.1939, - "step": 2128 - }, - { - "epoch": 1.611352885525071, - "grad_norm": 2.36734938621521, - "learning_rate": 9.01917900353267e-06, - "loss": 1.163, - "step": 2129 - }, - { - "epoch": 1.6121097445600756, - "grad_norm": 2.2100653648376465, - "learning_rate": 9.011190314290852e-06, - "loss": 1.0951, - "step": 2130 - }, - { - "epoch": 1.6128666035950805, - "grad_norm": 2.239097833633423, - "learning_rate": 9.003202263065263e-06, - "loss": 1.1554, - "step": 2131 - }, - { - "epoch": 1.6136234626300852, - "grad_norm": 2.2774319648742676, - "learning_rate": 8.995214855004877e-06, - "loss": 1.1237, - "step": 2132 - }, - { - "epoch": 1.6143803216650898, - "grad_norm": 2.1328752040863037, - "learning_rate": 8.987228095258256e-06, - "loss": 1.1154, - "step": 2133 - }, - { - "epoch": 1.6151371807000947, - "grad_norm": 2.3373916149139404, - "learning_rate": 8.979241988973546e-06, - "loss": 1.1058, - "step": 2134 - }, - { - "epoch": 1.6158940397350994, - "grad_norm": 2.126988172531128, - "learning_rate": 8.971256541298468e-06, - "loss": 1.1709, - "step": 2135 - }, - { - "epoch": 1.616650898770104, - "grad_norm": 2.3820157051086426, - "learning_rate": 8.963271757380319e-06, - "loss": 1.1332, - "step": 2136 - }, - { - "epoch": 1.617407757805109, - "grad_norm": 2.0995140075683594, - "learning_rate": 8.955287642365969e-06, - "loss": 1.1341, - "step": 2137 - }, - { - "epoch": 1.6181646168401134, - "grad_norm": 2.2463080883026123, - "learning_rate": 8.94730420140186e-06, - "loss": 1.1455, - "step": 2138 - }, - { - "epoch": 1.6189214758751183, - "grad_norm": 2.293729066848755, - "learning_rate": 8.939321439633991e-06, - "loss": 1.103, - "step": 2139 - }, - { - "epoch": 1.6196783349101231, - "grad_norm": 2.1475143432617188, - "learning_rate": 8.931339362207931e-06, - "loss": 1.124, - "step": 2140 - }, - { - "epoch": 1.6204351939451276, - "grad_norm": 2.087843179702759, - "learning_rate": 8.923357974268806e-06, - "loss": 1.1743, - "step": 2141 - }, - { - "epoch": 1.6211920529801325, - "grad_norm": 2.0908894538879395, - "learning_rate": 8.915377280961298e-06, - "loss": 1.0961, - "step": 2142 - }, - { - "epoch": 1.6219489120151371, - "grad_norm": 2.312263250350952, - "learning_rate": 8.907397287429635e-06, - "loss": 1.1523, - "step": 2143 - }, - { - "epoch": 1.6227057710501418, - "grad_norm": 2.30190110206604, - "learning_rate": 8.899417998817605e-06, - "loss": 1.1319, - "step": 2144 - }, - { - "epoch": 1.6234626300851467, - "grad_norm": 2.0427803993225098, - "learning_rate": 8.891439420268534e-06, - "loss": 1.1007, - "step": 2145 - }, - { - "epoch": 1.6242194891201513, - "grad_norm": 2.142066717147827, - "learning_rate": 8.88346155692529e-06, - "loss": 1.1465, - "step": 2146 - }, - { - "epoch": 1.624976348155156, - "grad_norm": 1.9997434616088867, - "learning_rate": 8.875484413930283e-06, - "loss": 1.0966, - "step": 2147 - }, - { - "epoch": 1.625733207190161, - "grad_norm": 2.0591166019439697, - "learning_rate": 8.86750799642546e-06, - "loss": 1.1409, - "step": 2148 - }, - { - "epoch": 1.6264900662251656, - "grad_norm": 2.044402599334717, - "learning_rate": 8.859532309552298e-06, - "loss": 1.1145, - "step": 2149 - }, - { - "epoch": 1.6272469252601702, - "grad_norm": 2.3767645359039307, - "learning_rate": 8.8515573584518e-06, - "loss": 1.1278, - "step": 2150 - }, - { - "epoch": 1.6280037842951751, - "grad_norm": 2.088170289993286, - "learning_rate": 8.843583148264496e-06, - "loss": 1.1607, - "step": 2151 - }, - { - "epoch": 1.6287606433301798, - "grad_norm": 2.026031255722046, - "learning_rate": 8.835609684130448e-06, - "loss": 1.1173, - "step": 2152 - }, - { - "epoch": 1.6295175023651844, - "grad_norm": 2.108065366744995, - "learning_rate": 8.827636971189222e-06, - "loss": 1.1735, - "step": 2153 - }, - { - "epoch": 1.6302743614001893, - "grad_norm": 2.12156081199646, - "learning_rate": 8.819665014579911e-06, - "loss": 1.0851, - "step": 2154 - }, - { - "epoch": 1.631031220435194, - "grad_norm": 2.0895984172821045, - "learning_rate": 8.81169381944112e-06, - "loss": 1.1261, - "step": 2155 - }, - { - "epoch": 1.6317880794701987, - "grad_norm": 2.119001865386963, - "learning_rate": 8.803723390910951e-06, - "loss": 1.1236, - "step": 2156 - }, - { - "epoch": 1.6325449385052035, - "grad_norm": 2.0929312705993652, - "learning_rate": 8.795753734127024e-06, - "loss": 1.157, - "step": 2157 - }, - { - "epoch": 1.633301797540208, - "grad_norm": 2.0275888442993164, - "learning_rate": 8.787784854226465e-06, - "loss": 1.1407, - "step": 2158 - }, - { - "epoch": 1.6340586565752129, - "grad_norm": 2.332402467727661, - "learning_rate": 8.779816756345884e-06, - "loss": 1.0916, - "step": 2159 - }, - { - "epoch": 1.6348155156102178, - "grad_norm": 2.0872373580932617, - "learning_rate": 8.7718494456214e-06, - "loss": 1.1492, - "step": 2160 - }, - { - "epoch": 1.6355723746452222, - "grad_norm": 2.1566085815429688, - "learning_rate": 8.763882927188615e-06, - "loss": 1.1397, - "step": 2161 - }, - { - "epoch": 1.636329233680227, - "grad_norm": 2.134572744369507, - "learning_rate": 8.75591720618263e-06, - "loss": 1.0967, - "step": 2162 - }, - { - "epoch": 1.6370860927152318, - "grad_norm": 2.061708450317383, - "learning_rate": 8.74795228773803e-06, - "loss": 1.0709, - "step": 2163 - }, - { - "epoch": 1.6378429517502364, - "grad_norm": 2.04203724861145, - "learning_rate": 8.739988176988869e-06, - "loss": 1.0671, - "step": 2164 - }, - { - "epoch": 1.6385998107852413, - "grad_norm": 2.0386204719543457, - "learning_rate": 8.732024879068702e-06, - "loss": 1.1021, - "step": 2165 - }, - { - "epoch": 1.639356669820246, - "grad_norm": 2.104109764099121, - "learning_rate": 8.724062399110547e-06, - "loss": 1.0964, - "step": 2166 - }, - { - "epoch": 1.6401135288552506, - "grad_norm": 2.078735113143921, - "learning_rate": 8.716100742246894e-06, - "loss": 1.1241, - "step": 2167 - }, - { - "epoch": 1.6408703878902555, - "grad_norm": 2.1530871391296387, - "learning_rate": 8.708139913609705e-06, - "loss": 1.118, - "step": 2168 - }, - { - "epoch": 1.6416272469252602, - "grad_norm": 2.1105563640594482, - "learning_rate": 8.700179918330419e-06, - "loss": 1.0883, - "step": 2169 - }, - { - "epoch": 1.6423841059602649, - "grad_norm": 2.056195020675659, - "learning_rate": 8.692220761539912e-06, - "loss": 1.1549, - "step": 2170 - }, - { - "epoch": 1.6431409649952697, - "grad_norm": 2.327533006668091, - "learning_rate": 8.684262448368546e-06, - "loss": 1.1097, - "step": 2171 - }, - { - "epoch": 1.6438978240302744, - "grad_norm": 2.111985206604004, - "learning_rate": 8.676304983946122e-06, - "loss": 1.1048, - "step": 2172 - }, - { - "epoch": 1.644654683065279, - "grad_norm": 2.1778697967529297, - "learning_rate": 8.668348373401908e-06, - "loss": 1.1644, - "step": 2173 - }, - { - "epoch": 1.645411542100284, - "grad_norm": 2.3045222759246826, - "learning_rate": 8.660392621864608e-06, - "loss": 1.1873, - "step": 2174 - }, - { - "epoch": 1.6461684011352884, - "grad_norm": 2.2667534351348877, - "learning_rate": 8.652437734462377e-06, - "loss": 1.0519, - "step": 2175 - }, - { - "epoch": 1.6469252601702933, - "grad_norm": 2.394404888153076, - "learning_rate": 8.644483716322818e-06, - "loss": 1.1324, - "step": 2176 - }, - { - "epoch": 1.6476821192052982, - "grad_norm": 2.009328603744507, - "learning_rate": 8.63653057257297e-06, - "loss": 1.147, - "step": 2177 - }, - { - "epoch": 1.6484389782403026, - "grad_norm": 2.072662591934204, - "learning_rate": 8.6285783083393e-06, - "loss": 1.1435, - "step": 2178 - }, - { - "epoch": 1.6491958372753075, - "grad_norm": 2.184267044067383, - "learning_rate": 8.620626928747725e-06, - "loss": 1.1896, - "step": 2179 - }, - { - "epoch": 1.6499526963103122, - "grad_norm": 2.0765388011932373, - "learning_rate": 8.612676438923587e-06, - "loss": 1.125, - "step": 2180 - }, - { - "epoch": 1.6507095553453168, - "grad_norm": 2.2719030380249023, - "learning_rate": 8.604726843991637e-06, - "loss": 1.074, - "step": 2181 - }, - { - "epoch": 1.6514664143803217, - "grad_norm": 2.0793111324310303, - "learning_rate": 8.596778149076073e-06, - "loss": 1.099, - "step": 2182 - }, - { - "epoch": 1.6522232734153264, - "grad_norm": 1.9967988729476929, - "learning_rate": 8.588830359300499e-06, - "loss": 1.1454, - "step": 2183 - }, - { - "epoch": 1.652980132450331, - "grad_norm": 2.3746399879455566, - "learning_rate": 8.580883479787936e-06, - "loss": 1.1424, - "step": 2184 - }, - { - "epoch": 1.653736991485336, - "grad_norm": 2.0627639293670654, - "learning_rate": 8.57293751566083e-06, - "loss": 1.1052, - "step": 2185 - }, - { - "epoch": 1.6544938505203406, - "grad_norm": 2.196162700653076, - "learning_rate": 8.564992472041021e-06, - "loss": 1.1002, - "step": 2186 - }, - { - "epoch": 1.6552507095553453, - "grad_norm": 2.3567469120025635, - "learning_rate": 8.557048354049763e-06, - "loss": 1.1426, - "step": 2187 - }, - { - "epoch": 1.6560075685903501, - "grad_norm": 2.0467660427093506, - "learning_rate": 8.549105166807716e-06, - "loss": 1.0916, - "step": 2188 - }, - { - "epoch": 1.6567644276253548, - "grad_norm": 2.107483386993408, - "learning_rate": 8.541162915434935e-06, - "loss": 1.1226, - "step": 2189 - }, - { - "epoch": 1.6575212866603595, - "grad_norm": 2.2864937782287598, - "learning_rate": 8.533221605050878e-06, - "loss": 1.1246, - "step": 2190 - }, - { - "epoch": 1.6582781456953644, - "grad_norm": 2.135864734649658, - "learning_rate": 8.525281240774391e-06, - "loss": 1.0364, - "step": 2191 - }, - { - "epoch": 1.659035004730369, - "grad_norm": 2.136951446533203, - "learning_rate": 8.517341827723709e-06, - "loss": 1.1753, - "step": 2192 - }, - { - "epoch": 1.6597918637653737, - "grad_norm": 2.1684775352478027, - "learning_rate": 8.509403371016462e-06, - "loss": 1.0812, - "step": 2193 - }, - { - "epoch": 1.6605487228003786, - "grad_norm": 2.195051431655884, - "learning_rate": 8.501465875769652e-06, - "loss": 1.0883, - "step": 2194 - }, - { - "epoch": 1.661305581835383, - "grad_norm": 2.2236487865448, - "learning_rate": 8.493529347099669e-06, - "loss": 1.0637, - "step": 2195 - }, - { - "epoch": 1.662062440870388, - "grad_norm": 2.1652839183807373, - "learning_rate": 8.48559379012228e-06, - "loss": 1.1285, - "step": 2196 - }, - { - "epoch": 1.6628192999053926, - "grad_norm": 2.3312926292419434, - "learning_rate": 8.477659209952627e-06, - "loss": 1.1303, - "step": 2197 - }, - { - "epoch": 1.6635761589403972, - "grad_norm": 2.1476340293884277, - "learning_rate": 8.46972561170521e-06, - "loss": 1.0911, - "step": 2198 - }, - { - "epoch": 1.6643330179754021, - "grad_norm": 2.205261707305908, - "learning_rate": 8.461793000493917e-06, - "loss": 1.1075, - "step": 2199 - }, - { - "epoch": 1.6650898770104068, - "grad_norm": 2.3584821224212646, - "learning_rate": 8.45386138143198e-06, - "loss": 1.1429, - "step": 2200 - }, - { - "epoch": 1.6658467360454114, - "grad_norm": 1.990213394165039, - "learning_rate": 8.445930759632e-06, - "loss": 1.0731, - "step": 2201 - }, - { - "epoch": 1.6666035950804163, - "grad_norm": 2.114382266998291, - "learning_rate": 8.43800114020594e-06, - "loss": 1.1304, - "step": 2202 - }, - { - "epoch": 1.667360454115421, - "grad_norm": 2.6425230503082275, - "learning_rate": 8.430072528265107e-06, - "loss": 1.1223, - "step": 2203 - }, - { - "epoch": 1.6681173131504257, - "grad_norm": 2.238675594329834, - "learning_rate": 8.422144928920168e-06, - "loss": 1.1187, - "step": 2204 - }, - { - "epoch": 1.6688741721854305, - "grad_norm": 2.0409348011016846, - "learning_rate": 8.414218347281127e-06, - "loss": 1.0912, - "step": 2205 - }, - { - "epoch": 1.6696310312204352, - "grad_norm": 2.5583693981170654, - "learning_rate": 8.406292788457338e-06, - "loss": 1.1433, - "step": 2206 - }, - { - "epoch": 1.6703878902554399, - "grad_norm": 2.24996018409729, - "learning_rate": 8.398368257557505e-06, - "loss": 1.1177, - "step": 2207 - }, - { - "epoch": 1.6711447492904448, - "grad_norm": 2.2110371589660645, - "learning_rate": 8.390444759689646e-06, - "loss": 1.1334, - "step": 2208 - }, - { - "epoch": 1.6719016083254494, - "grad_norm": 2.0102930068969727, - "learning_rate": 8.382522299961135e-06, - "loss": 1.0807, - "step": 2209 - }, - { - "epoch": 1.672658467360454, - "grad_norm": 2.319051504135132, - "learning_rate": 8.37460088347867e-06, - "loss": 1.0997, - "step": 2210 - }, - { - "epoch": 1.673415326395459, - "grad_norm": 2.3122832775115967, - "learning_rate": 8.36668051534827e-06, - "loss": 1.1591, - "step": 2211 - }, - { - "epoch": 1.6741721854304634, - "grad_norm": 2.39446759223938, - "learning_rate": 8.358761200675284e-06, - "loss": 1.1201, - "step": 2212 - }, - { - "epoch": 1.6749290444654683, - "grad_norm": 2.257894515991211, - "learning_rate": 8.350842944564386e-06, - "loss": 1.1094, - "step": 2213 - }, - { - "epoch": 1.6756859035004732, - "grad_norm": 2.2300925254821777, - "learning_rate": 8.34292575211956e-06, - "loss": 1.1613, - "step": 2214 - }, - { - "epoch": 1.6764427625354776, - "grad_norm": 2.2363715171813965, - "learning_rate": 8.33500962844411e-06, - "loss": 1.1485, - "step": 2215 - }, - { - "epoch": 1.6771996215704825, - "grad_norm": 2.0160231590270996, - "learning_rate": 8.327094578640643e-06, - "loss": 1.1136, - "step": 2216 - }, - { - "epoch": 1.6779564806054872, - "grad_norm": 2.279360294342041, - "learning_rate": 8.319180607811085e-06, - "loss": 1.1193, - "step": 2217 - }, - { - "epoch": 1.6787133396404919, - "grad_norm": 2.280641555786133, - "learning_rate": 8.31126772105666e-06, - "loss": 1.1655, - "step": 2218 - }, - { - "epoch": 1.6794701986754967, - "grad_norm": 2.077263832092285, - "learning_rate": 8.303355923477889e-06, - "loss": 1.1435, - "step": 2219 - }, - { - "epoch": 1.6802270577105014, - "grad_norm": 2.271101236343384, - "learning_rate": 8.295445220174604e-06, - "loss": 1.0986, - "step": 2220 - }, - { - "epoch": 1.680983916745506, - "grad_norm": 2.257680892944336, - "learning_rate": 8.28753561624592e-06, - "loss": 1.095, - "step": 2221 - }, - { - "epoch": 1.681740775780511, - "grad_norm": 2.18369722366333, - "learning_rate": 8.279627116790244e-06, - "loss": 1.1007, - "step": 2222 - }, - { - "epoch": 1.6824976348155156, - "grad_norm": 2.0165445804595947, - "learning_rate": 8.271719726905275e-06, - "loss": 1.1165, - "step": 2223 - }, - { - "epoch": 1.6832544938505203, - "grad_norm": 2.2388248443603516, - "learning_rate": 8.263813451688006e-06, - "loss": 1.1186, - "step": 2224 - }, - { - "epoch": 1.6840113528855252, - "grad_norm": 2.32745099067688, - "learning_rate": 8.255908296234688e-06, - "loss": 1.1812, - "step": 2225 - }, - { - "epoch": 1.6847682119205298, - "grad_norm": 2.361375093460083, - "learning_rate": 8.248004265640868e-06, - "loss": 1.1649, - "step": 2226 - }, - { - "epoch": 1.6855250709555345, - "grad_norm": 2.2414417266845703, - "learning_rate": 8.240101365001368e-06, - "loss": 1.1013, - "step": 2227 - }, - { - "epoch": 1.6862819299905394, - "grad_norm": 1.9859745502471924, - "learning_rate": 8.232199599410273e-06, - "loss": 1.1295, - "step": 2228 - }, - { - "epoch": 1.6870387890255438, - "grad_norm": 2.1757733821868896, - "learning_rate": 8.22429897396094e-06, - "loss": 1.1223, - "step": 2229 - }, - { - "epoch": 1.6877956480605487, - "grad_norm": 2.36989164352417, - "learning_rate": 8.216399493745992e-06, - "loss": 1.1337, - "step": 2230 - }, - { - "epoch": 1.6885525070955536, - "grad_norm": 1.9508718252182007, - "learning_rate": 8.208501163857318e-06, - "loss": 1.1351, - "step": 2231 - }, - { - "epoch": 1.689309366130558, - "grad_norm": 2.057548999786377, - "learning_rate": 8.200603989386055e-06, - "loss": 1.1382, - "step": 2232 - }, - { - "epoch": 1.690066225165563, - "grad_norm": 2.3355371952056885, - "learning_rate": 8.192707975422604e-06, - "loss": 1.1393, - "step": 2233 - }, - { - "epoch": 1.6908230842005676, - "grad_norm": 2.1525468826293945, - "learning_rate": 8.184813127056616e-06, - "loss": 1.1665, - "step": 2234 - }, - { - "epoch": 1.6915799432355723, - "grad_norm": 2.2342827320098877, - "learning_rate": 8.176919449376989e-06, - "loss": 1.1385, - "step": 2235 - }, - { - "epoch": 1.6923368022705771, - "grad_norm": 2.1949453353881836, - "learning_rate": 8.169026947471866e-06, - "loss": 1.1635, - "step": 2236 - }, - { - "epoch": 1.6930936613055818, - "grad_norm": 2.183218240737915, - "learning_rate": 8.161135626428633e-06, - "loss": 1.1433, - "step": 2237 - }, - { - "epoch": 1.6938505203405865, - "grad_norm": 2.1611173152923584, - "learning_rate": 8.153245491333922e-06, - "loss": 1.1083, - "step": 2238 - }, - { - "epoch": 1.6946073793755914, - "grad_norm": 2.0343189239501953, - "learning_rate": 8.145356547273584e-06, - "loss": 1.1334, - "step": 2239 - }, - { - "epoch": 1.695364238410596, - "grad_norm": 2.0303726196289062, - "learning_rate": 8.13746879933272e-06, - "loss": 1.1545, - "step": 2240 - }, - { - "epoch": 1.6961210974456007, - "grad_norm": 2.152381658554077, - "learning_rate": 8.129582252595645e-06, - "loss": 1.1316, - "step": 2241 - }, - { - "epoch": 1.6968779564806056, - "grad_norm": 2.0682196617126465, - "learning_rate": 8.12169691214591e-06, - "loss": 1.1396, - "step": 2242 - }, - { - "epoch": 1.6976348155156102, - "grad_norm": 2.07098388671875, - "learning_rate": 8.113812783066288e-06, - "loss": 1.0784, - "step": 2243 - }, - { - "epoch": 1.698391674550615, - "grad_norm": 2.1533405780792236, - "learning_rate": 8.105929870438762e-06, - "loss": 1.1151, - "step": 2244 - }, - { - "epoch": 1.6991485335856198, - "grad_norm": 2.348604679107666, - "learning_rate": 8.098048179344545e-06, - "loss": 1.0913, - "step": 2245 - }, - { - "epoch": 1.6999053926206245, - "grad_norm": 2.0196192264556885, - "learning_rate": 8.090167714864051e-06, - "loss": 1.1334, - "step": 2246 - }, - { - "epoch": 1.7006622516556291, - "grad_norm": 2.2407851219177246, - "learning_rate": 8.082288482076904e-06, - "loss": 1.1362, - "step": 2247 - }, - { - "epoch": 1.701419110690634, - "grad_norm": 2.0429224967956543, - "learning_rate": 8.074410486061943e-06, - "loss": 1.0377, - "step": 2248 - }, - { - "epoch": 1.7021759697256384, - "grad_norm": 2.149394989013672, - "learning_rate": 8.066533731897202e-06, - "loss": 1.1324, - "step": 2249 - }, - { - "epoch": 1.7029328287606433, - "grad_norm": 2.5104711055755615, - "learning_rate": 8.058658224659914e-06, - "loss": 1.1037, - "step": 2250 - }, - { - "epoch": 1.7036896877956482, - "grad_norm": 1.9572622776031494, - "learning_rate": 8.050783969426517e-06, - "loss": 1.1231, - "step": 2251 - }, - { - "epoch": 1.7044465468306527, - "grad_norm": 2.344362497329712, - "learning_rate": 8.042910971272627e-06, - "loss": 1.1054, - "step": 2252 - }, - { - "epoch": 1.7052034058656576, - "grad_norm": 2.160923719406128, - "learning_rate": 8.035039235273063e-06, - "loss": 1.1429, - "step": 2253 - }, - { - "epoch": 1.7059602649006622, - "grad_norm": 2.273373603820801, - "learning_rate": 8.027168766501831e-06, - "loss": 1.1073, - "step": 2254 - }, - { - "epoch": 1.7067171239356669, - "grad_norm": 2.043576955795288, - "learning_rate": 8.019299570032108e-06, - "loss": 1.1326, - "step": 2255 - }, - { - "epoch": 1.7074739829706718, - "grad_norm": 2.3227038383483887, - "learning_rate": 8.011431650936259e-06, - "loss": 1.0762, - "step": 2256 - }, - { - "epoch": 1.7082308420056764, - "grad_norm": 2.2618634700775146, - "learning_rate": 8.003565014285829e-06, - "loss": 1.1246, - "step": 2257 - }, - { - "epoch": 1.708987701040681, - "grad_norm": 2.072833776473999, - "learning_rate": 7.99569966515153e-06, - "loss": 1.128, - "step": 2258 - }, - { - "epoch": 1.709744560075686, - "grad_norm": 2.305095911026001, - "learning_rate": 7.987835608603241e-06, - "loss": 1.0533, - "step": 2259 - }, - { - "epoch": 1.7105014191106906, - "grad_norm": 2.25026535987854, - "learning_rate": 7.979972849710022e-06, - "loss": 1.1497, - "step": 2260 - }, - { - "epoch": 1.7112582781456953, - "grad_norm": 2.259713888168335, - "learning_rate": 7.972111393540079e-06, - "loss": 1.1364, - "step": 2261 - }, - { - "epoch": 1.7120151371807002, - "grad_norm": 2.3140814304351807, - "learning_rate": 7.964251245160795e-06, - "loss": 1.1363, - "step": 2262 - }, - { - "epoch": 1.7127719962157049, - "grad_norm": 2.25529408454895, - "learning_rate": 7.956392409638693e-06, - "loss": 1.1304, - "step": 2263 - }, - { - "epoch": 1.7135288552507095, - "grad_norm": 2.118211030960083, - "learning_rate": 7.948534892039462e-06, - "loss": 1.1227, - "step": 2264 - }, - { - "epoch": 1.7142857142857144, - "grad_norm": 2.260540008544922, - "learning_rate": 7.940678697427945e-06, - "loss": 1.0716, - "step": 2265 - }, - { - "epoch": 1.7150425733207189, - "grad_norm": 2.334322690963745, - "learning_rate": 7.932823830868114e-06, - "loss": 1.1458, - "step": 2266 - }, - { - "epoch": 1.7157994323557237, - "grad_norm": 2.1086597442626953, - "learning_rate": 7.9249702974231e-06, - "loss": 1.1264, - "step": 2267 - }, - { - "epoch": 1.7165562913907286, - "grad_norm": 2.286928176879883, - "learning_rate": 7.917118102155175e-06, - "loss": 1.1079, - "step": 2268 - }, - { - "epoch": 1.717313150425733, - "grad_norm": 2.132174491882324, - "learning_rate": 7.909267250125743e-06, - "loss": 1.1201, - "step": 2269 - }, - { - "epoch": 1.718070009460738, - "grad_norm": 2.0687386989593506, - "learning_rate": 7.901417746395338e-06, - "loss": 1.0981, - "step": 2270 - }, - { - "epoch": 1.7188268684957426, - "grad_norm": 2.2579548358917236, - "learning_rate": 7.893569596023638e-06, - "loss": 1.1193, - "step": 2271 - }, - { - "epoch": 1.7195837275307473, - "grad_norm": 2.1385035514831543, - "learning_rate": 7.885722804069435e-06, - "loss": 1.1826, - "step": 2272 - }, - { - "epoch": 1.7203405865657522, - "grad_norm": 1.9475092887878418, - "learning_rate": 7.877877375590657e-06, - "loss": 1.101, - "step": 2273 - }, - { - "epoch": 1.7210974456007568, - "grad_norm": 2.2321999073028564, - "learning_rate": 7.87003331564434e-06, - "loss": 1.121, - "step": 2274 - }, - { - "epoch": 1.7218543046357615, - "grad_norm": 2.091757297515869, - "learning_rate": 7.86219062928665e-06, - "loss": 1.0453, - "step": 2275 - }, - { - "epoch": 1.7226111636707664, - "grad_norm": 1.965161681175232, - "learning_rate": 7.854349321572868e-06, - "loss": 1.1628, - "step": 2276 - }, - { - "epoch": 1.723368022705771, - "grad_norm": 2.090461492538452, - "learning_rate": 7.846509397557372e-06, - "loss": 1.1603, - "step": 2277 - }, - { - "epoch": 1.7241248817407757, - "grad_norm": 2.0913045406341553, - "learning_rate": 7.83867086229366e-06, - "loss": 1.186, - "step": 2278 - }, - { - "epoch": 1.7248817407757806, - "grad_norm": 2.137932300567627, - "learning_rate": 7.83083372083434e-06, - "loss": 1.129, - "step": 2279 - }, - { - "epoch": 1.7256385998107853, - "grad_norm": 2.1641454696655273, - "learning_rate": 7.822997978231101e-06, - "loss": 1.1261, - "step": 2280 - }, - { - "epoch": 1.72639545884579, - "grad_norm": 1.977129340171814, - "learning_rate": 7.815163639534752e-06, - "loss": 1.1332, - "step": 2281 - }, - { - "epoch": 1.7271523178807948, - "grad_norm": 2.3945367336273193, - "learning_rate": 7.807330709795191e-06, - "loss": 1.0711, - "step": 2282 - }, - { - "epoch": 1.7279091769157995, - "grad_norm": 1.9358054399490356, - "learning_rate": 7.799499194061395e-06, - "loss": 1.0986, - "step": 2283 - }, - { - "epoch": 1.7286660359508041, - "grad_norm": 2.242386817932129, - "learning_rate": 7.791669097381447e-06, - "loss": 1.1113, - "step": 2284 - }, - { - "epoch": 1.729422894985809, - "grad_norm": 2.1633381843566895, - "learning_rate": 7.783840424802504e-06, - "loss": 1.1581, - "step": 2285 - }, - { - "epoch": 1.7301797540208135, - "grad_norm": 2.0929739475250244, - "learning_rate": 7.776013181370813e-06, - "loss": 1.1479, - "step": 2286 - }, - { - "epoch": 1.7309366130558184, - "grad_norm": 2.2305848598480225, - "learning_rate": 7.768187372131693e-06, - "loss": 1.1683, - "step": 2287 - }, - { - "epoch": 1.7316934720908232, - "grad_norm": 1.938439130783081, - "learning_rate": 7.76036300212954e-06, - "loss": 1.0774, - "step": 2288 - }, - { - "epoch": 1.7324503311258277, - "grad_norm": 2.031890869140625, - "learning_rate": 7.752540076407829e-06, - "loss": 1.0838, - "step": 2289 - }, - { - "epoch": 1.7332071901608326, - "grad_norm": 2.242338180541992, - "learning_rate": 7.744718600009093e-06, - "loss": 1.1161, - "step": 2290 - }, - { - "epoch": 1.7339640491958372, - "grad_norm": 2.1008989810943604, - "learning_rate": 7.736898577974936e-06, - "loss": 1.071, - "step": 2291 - }, - { - "epoch": 1.734720908230842, - "grad_norm": 1.9958288669586182, - "learning_rate": 7.72908001534603e-06, - "loss": 1.0919, - "step": 2292 - }, - { - "epoch": 1.7354777672658468, - "grad_norm": 2.2547950744628906, - "learning_rate": 7.7212629171621e-06, - "loss": 1.1024, - "step": 2293 - }, - { - "epoch": 1.7362346263008515, - "grad_norm": 2.198230743408203, - "learning_rate": 7.713447288461922e-06, - "loss": 1.1567, - "step": 2294 - }, - { - "epoch": 1.7369914853358561, - "grad_norm": 2.047135591506958, - "learning_rate": 7.705633134283342e-06, - "loss": 1.1345, - "step": 2295 - }, - { - "epoch": 1.737748344370861, - "grad_norm": 2.021092176437378, - "learning_rate": 7.697820459663234e-06, - "loss": 1.0968, - "step": 2296 - }, - { - "epoch": 1.7385052034058657, - "grad_norm": 2.04164719581604, - "learning_rate": 7.690009269637535e-06, - "loss": 1.1234, - "step": 2297 - }, - { - "epoch": 1.7392620624408703, - "grad_norm": 2.0042457580566406, - "learning_rate": 7.68219956924122e-06, - "loss": 1.1361, - "step": 2298 - }, - { - "epoch": 1.7400189214758752, - "grad_norm": 2.207336902618408, - "learning_rate": 7.674391363508293e-06, - "loss": 1.1241, - "step": 2299 - }, - { - "epoch": 1.7407757805108799, - "grad_norm": 2.4397289752960205, - "learning_rate": 7.666584657471819e-06, - "loss": 1.1499, - "step": 2300 - }, - { - "epoch": 1.7415326395458846, - "grad_norm": 1.9008210897445679, - "learning_rate": 7.65877945616387e-06, - "loss": 1.075, - "step": 2301 - }, - { - "epoch": 1.7422894985808894, - "grad_norm": 2.0731241703033447, - "learning_rate": 7.650975764615564e-06, - "loss": 1.1104, - "step": 2302 - }, - { - "epoch": 1.7430463576158939, - "grad_norm": 2.0302274227142334, - "learning_rate": 7.643173587857043e-06, - "loss": 1.1129, - "step": 2303 - }, - { - "epoch": 1.7438032166508988, - "grad_norm": 2.3482978343963623, - "learning_rate": 7.635372930917471e-06, - "loss": 1.1239, - "step": 2304 - }, - { - "epoch": 1.7445600756859037, - "grad_norm": 2.1537070274353027, - "learning_rate": 7.627573798825028e-06, - "loss": 1.1213, - "step": 2305 - }, - { - "epoch": 1.745316934720908, - "grad_norm": 3.2694427967071533, - "learning_rate": 7.619776196606923e-06, - "loss": 1.1201, - "step": 2306 - }, - { - "epoch": 1.746073793755913, - "grad_norm": 1.9938985109329224, - "learning_rate": 7.611980129289362e-06, - "loss": 1.1202, - "step": 2307 - }, - { - "epoch": 1.7468306527909176, - "grad_norm": 1.9866852760314941, - "learning_rate": 7.604185601897578e-06, - "loss": 1.1716, - "step": 2308 - }, - { - "epoch": 1.7475875118259223, - "grad_norm": 2.149052143096924, - "learning_rate": 7.596392619455805e-06, - "loss": 1.0911, - "step": 2309 - }, - { - "epoch": 1.7483443708609272, - "grad_norm": 2.1124277114868164, - "learning_rate": 7.588601186987277e-06, - "loss": 1.1686, - "step": 2310 - }, - { - "epoch": 1.7491012298959319, - "grad_norm": 2.1678647994995117, - "learning_rate": 7.5808113095142334e-06, - "loss": 1.1356, - "step": 2311 - }, - { - "epoch": 1.7498580889309365, - "grad_norm": 2.2444238662719727, - "learning_rate": 7.573022992057911e-06, - "loss": 1.1302, - "step": 2312 - }, - { - "epoch": 1.7506149479659414, - "grad_norm": 2.296766757965088, - "learning_rate": 7.565236239638542e-06, - "loss": 1.1317, - "step": 2313 - }, - { - "epoch": 1.751371807000946, - "grad_norm": 2.106170177459717, - "learning_rate": 7.557451057275346e-06, - "loss": 1.138, - "step": 2314 - }, - { - "epoch": 1.7521286660359507, - "grad_norm": 1.8964704275131226, - "learning_rate": 7.549667449986533e-06, - "loss": 1.1121, - "step": 2315 - }, - { - "epoch": 1.7528855250709556, - "grad_norm": 2.2497787475585938, - "learning_rate": 7.541885422789297e-06, - "loss": 1.1607, - "step": 2316 - }, - { - "epoch": 1.7536423841059603, - "grad_norm": 1.980980634689331, - "learning_rate": 7.53410498069982e-06, - "loss": 1.0806, - "step": 2317 - }, - { - "epoch": 1.754399243140965, - "grad_norm": 2.030378818511963, - "learning_rate": 7.526326128733247e-06, - "loss": 1.1, - "step": 2318 - }, - { - "epoch": 1.7551561021759698, - "grad_norm": 1.9196511507034302, - "learning_rate": 7.5185488719037105e-06, - "loss": 1.1771, - "step": 2319 - }, - { - "epoch": 1.7559129612109745, - "grad_norm": 1.9450955390930176, - "learning_rate": 7.510773215224318e-06, - "loss": 1.1347, - "step": 2320 - }, - { - "epoch": 1.7566698202459792, - "grad_norm": 1.9371559619903564, - "learning_rate": 7.502999163707131e-06, - "loss": 1.1026, - "step": 2321 - }, - { - "epoch": 1.757426679280984, - "grad_norm": 2.021090269088745, - "learning_rate": 7.4952267223631865e-06, - "loss": 1.1514, - "step": 2322 - }, - { - "epoch": 1.7581835383159885, - "grad_norm": 2.016483783721924, - "learning_rate": 7.487455896202487e-06, - "loss": 1.1315, - "step": 2323 - }, - { - "epoch": 1.7589403973509934, - "grad_norm": 2.051363945007324, - "learning_rate": 7.479686690233981e-06, - "loss": 1.1094, - "step": 2324 - }, - { - "epoch": 1.759697256385998, - "grad_norm": 2.3509905338287354, - "learning_rate": 7.471919109465584e-06, - "loss": 1.1104, - "step": 2325 - }, - { - "epoch": 1.7604541154210027, - "grad_norm": 2.0247390270233154, - "learning_rate": 7.46415315890416e-06, - "loss": 1.1122, - "step": 2326 - }, - { - "epoch": 1.7612109744560076, - "grad_norm": 2.1923465728759766, - "learning_rate": 7.456388843555525e-06, - "loss": 1.1308, - "step": 2327 - }, - { - "epoch": 1.7619678334910123, - "grad_norm": 2.132502555847168, - "learning_rate": 7.448626168424434e-06, - "loss": 1.1637, - "step": 2328 - }, - { - "epoch": 1.762724692526017, - "grad_norm": 1.9766474962234497, - "learning_rate": 7.440865138514587e-06, - "loss": 1.1019, - "step": 2329 - }, - { - "epoch": 1.7634815515610218, - "grad_norm": 2.1354434490203857, - "learning_rate": 7.433105758828631e-06, - "loss": 1.0869, - "step": 2330 - }, - { - "epoch": 1.7642384105960265, - "grad_norm": 2.135441303253174, - "learning_rate": 7.425348034368143e-06, - "loss": 1.1077, - "step": 2331 - }, - { - "epoch": 1.7649952696310311, - "grad_norm": 1.9634079933166504, - "learning_rate": 7.41759197013363e-06, - "loss": 1.0686, - "step": 2332 - }, - { - "epoch": 1.765752128666036, - "grad_norm": 2.202788829803467, - "learning_rate": 7.409837571124535e-06, - "loss": 1.0706, - "step": 2333 - }, - { - "epoch": 1.7665089877010407, - "grad_norm": 2.3422369956970215, - "learning_rate": 7.40208484233923e-06, - "loss": 1.1673, - "step": 2334 - }, - { - "epoch": 1.7672658467360454, - "grad_norm": 2.338772773742676, - "learning_rate": 7.394333788774995e-06, - "loss": 1.1037, - "step": 2335 - }, - { - "epoch": 1.7680227057710503, - "grad_norm": 2.2548608779907227, - "learning_rate": 7.386584415428051e-06, - "loss": 1.1583, - "step": 2336 - }, - { - "epoch": 1.768779564806055, - "grad_norm": 2.1475353240966797, - "learning_rate": 7.3788367272935235e-06, - "loss": 1.1232, - "step": 2337 - }, - { - "epoch": 1.7695364238410596, - "grad_norm": 2.1223628520965576, - "learning_rate": 7.37109072936545e-06, - "loss": 1.1164, - "step": 2338 - }, - { - "epoch": 1.7702932828760645, - "grad_norm": 2.0361294746398926, - "learning_rate": 7.363346426636786e-06, - "loss": 1.16, - "step": 2339 - }, - { - "epoch": 1.771050141911069, - "grad_norm": 2.1143364906311035, - "learning_rate": 7.355603824099389e-06, - "loss": 1.142, - "step": 2340 - }, - { - "epoch": 1.7718070009460738, - "grad_norm": 2.2214882373809814, - "learning_rate": 7.347862926744027e-06, - "loss": 1.1375, - "step": 2341 - }, - { - "epoch": 1.7725638599810787, - "grad_norm": 1.9182907342910767, - "learning_rate": 7.34012373956036e-06, - "loss": 1.1099, - "step": 2342 - }, - { - "epoch": 1.7733207190160831, - "grad_norm": 2.0895349979400635, - "learning_rate": 7.332386267536949e-06, - "loss": 1.1397, - "step": 2343 - }, - { - "epoch": 1.774077578051088, - "grad_norm": 2.078885793685913, - "learning_rate": 7.3246505156612554e-06, - "loss": 1.1296, - "step": 2344 - }, - { - "epoch": 1.7748344370860927, - "grad_norm": 2.180187702178955, - "learning_rate": 7.3169164889196235e-06, - "loss": 1.1458, - "step": 2345 - }, - { - "epoch": 1.7755912961210973, - "grad_norm": 2.3137030601501465, - "learning_rate": 7.309184192297289e-06, - "loss": 1.0713, - "step": 2346 - }, - { - "epoch": 1.7763481551561022, - "grad_norm": 2.0382871627807617, - "learning_rate": 7.3014536307783725e-06, - "loss": 1.1288, - "step": 2347 - }, - { - "epoch": 1.777105014191107, - "grad_norm": 2.004988670349121, - "learning_rate": 7.293724809345879e-06, - "loss": 1.1304, - "step": 2348 - }, - { - "epoch": 1.7778618732261116, - "grad_norm": 2.0899946689605713, - "learning_rate": 7.285997732981683e-06, - "loss": 1.1095, - "step": 2349 - }, - { - "epoch": 1.7786187322611164, - "grad_norm": 2.197770118713379, - "learning_rate": 7.2782724066665475e-06, - "loss": 1.1219, - "step": 2350 - }, - { - "epoch": 1.779375591296121, - "grad_norm": 1.9547758102416992, - "learning_rate": 7.270548835380095e-06, - "loss": 1.0707, - "step": 2351 - }, - { - "epoch": 1.7801324503311258, - "grad_norm": 2.0842347145080566, - "learning_rate": 7.262827024100821e-06, - "loss": 1.1485, - "step": 2352 - }, - { - "epoch": 1.7808893093661307, - "grad_norm": 2.189990520477295, - "learning_rate": 7.255106977806092e-06, - "loss": 1.1403, - "step": 2353 - }, - { - "epoch": 1.7816461684011353, - "grad_norm": 2.299306869506836, - "learning_rate": 7.247388701472129e-06, - "loss": 1.1001, - "step": 2354 - }, - { - "epoch": 1.78240302743614, - "grad_norm": 2.0084657669067383, - "learning_rate": 7.239672200074012e-06, - "loss": 1.0777, - "step": 2355 - }, - { - "epoch": 1.7831598864711449, - "grad_norm": 1.9171555042266846, - "learning_rate": 7.231957478585687e-06, - "loss": 1.1022, - "step": 2356 - }, - { - "epoch": 1.7839167455061493, - "grad_norm": 2.116420030593872, - "learning_rate": 7.224244541979941e-06, - "loss": 1.0945, - "step": 2357 - }, - { - "epoch": 1.7846736045411542, - "grad_norm": 2.250598669052124, - "learning_rate": 7.216533395228419e-06, - "loss": 1.1327, - "step": 2358 - }, - { - "epoch": 1.785430463576159, - "grad_norm": 2.0988495349884033, - "learning_rate": 7.208824043301604e-06, - "loss": 1.1452, - "step": 2359 - }, - { - "epoch": 1.7861873226111635, - "grad_norm": 1.9777265787124634, - "learning_rate": 7.201116491168829e-06, - "loss": 1.0838, - "step": 2360 - }, - { - "epoch": 1.7869441816461684, - "grad_norm": 2.1055500507354736, - "learning_rate": 7.19341074379827e-06, - "loss": 1.0996, - "step": 2361 - }, - { - "epoch": 1.787701040681173, - "grad_norm": 1.8813843727111816, - "learning_rate": 7.185706806156921e-06, - "loss": 1.1238, - "step": 2362 - }, - { - "epoch": 1.7884578997161777, - "grad_norm": 1.9652965068817139, - "learning_rate": 7.178004683210634e-06, - "loss": 1.1141, - "step": 2363 - }, - { - "epoch": 1.7892147587511826, - "grad_norm": 2.2869348526000977, - "learning_rate": 7.170304379924078e-06, - "loss": 1.1882, - "step": 2364 - }, - { - "epoch": 1.7899716177861873, - "grad_norm": 2.1046929359436035, - "learning_rate": 7.162605901260749e-06, - "loss": 1.0947, - "step": 2365 - }, - { - "epoch": 1.790728476821192, - "grad_norm": 2.0936052799224854, - "learning_rate": 7.1549092521829676e-06, - "loss": 1.1371, - "step": 2366 - }, - { - "epoch": 1.7914853358561968, - "grad_norm": 2.0121428966522217, - "learning_rate": 7.147214437651881e-06, - "loss": 1.102, - "step": 2367 - }, - { - "epoch": 1.7922421948912015, - "grad_norm": 2.144970178604126, - "learning_rate": 7.139521462627446e-06, - "loss": 1.1266, - "step": 2368 - }, - { - "epoch": 1.7929990539262062, - "grad_norm": 2.3722221851348877, - "learning_rate": 7.1318303320684356e-06, - "loss": 1.1499, - "step": 2369 - }, - { - "epoch": 1.793755912961211, - "grad_norm": 2.255847454071045, - "learning_rate": 7.124141050932441e-06, - "loss": 1.1243, - "step": 2370 - }, - { - "epoch": 1.7945127719962157, - "grad_norm": 2.1879565715789795, - "learning_rate": 7.116453624175847e-06, - "loss": 1.0995, - "step": 2371 - }, - { - "epoch": 1.7952696310312204, - "grad_norm": 2.267245292663574, - "learning_rate": 7.108768056753863e-06, - "loss": 1.156, - "step": 2372 - }, - { - "epoch": 1.7960264900662253, - "grad_norm": 2.1807005405426025, - "learning_rate": 7.101084353620476e-06, - "loss": 1.1588, - "step": 2373 - }, - { - "epoch": 1.79678334910123, - "grad_norm": 2.2159693241119385, - "learning_rate": 7.0934025197284924e-06, - "loss": 1.0647, - "step": 2374 - }, - { - "epoch": 1.7975402081362346, - "grad_norm": 2.1058151721954346, - "learning_rate": 7.085722560029507e-06, - "loss": 1.1166, - "step": 2375 - }, - { - "epoch": 1.7982970671712395, - "grad_norm": 2.202956438064575, - "learning_rate": 7.0780444794738945e-06, - "loss": 1.1524, - "step": 2376 - }, - { - "epoch": 1.799053926206244, - "grad_norm": 2.15413761138916, - "learning_rate": 7.070368283010836e-06, - "loss": 1.1331, - "step": 2377 - }, - { - "epoch": 1.7998107852412488, - "grad_norm": 1.968179702758789, - "learning_rate": 7.062693975588291e-06, - "loss": 1.0785, - "step": 2378 - }, - { - "epoch": 1.8005676442762537, - "grad_norm": 2.2872471809387207, - "learning_rate": 7.0550215621529965e-06, - "loss": 1.1364, - "step": 2379 - }, - { - "epoch": 1.8013245033112582, - "grad_norm": 2.0598936080932617, - "learning_rate": 7.047351047650476e-06, - "loss": 1.1238, - "step": 2380 - }, - { - "epoch": 1.802081362346263, - "grad_norm": 2.055774688720703, - "learning_rate": 7.039682437025028e-06, - "loss": 1.1336, - "step": 2381 - }, - { - "epoch": 1.8028382213812677, - "grad_norm": 2.1142072677612305, - "learning_rate": 7.032015735219719e-06, - "loss": 1.1216, - "step": 2382 - }, - { - "epoch": 1.8035950804162724, - "grad_norm": 2.067873001098633, - "learning_rate": 7.024350947176391e-06, - "loss": 1.1253, - "step": 2383 - }, - { - "epoch": 1.8043519394512773, - "grad_norm": 1.906582236289978, - "learning_rate": 7.016688077835645e-06, - "loss": 1.1002, - "step": 2384 - }, - { - "epoch": 1.805108798486282, - "grad_norm": 2.005889892578125, - "learning_rate": 7.009027132136853e-06, - "loss": 1.135, - "step": 2385 - }, - { - "epoch": 1.8058656575212866, - "grad_norm": 1.9194884300231934, - "learning_rate": 7.001368115018144e-06, - "loss": 1.0872, - "step": 2386 - }, - { - "epoch": 1.8066225165562915, - "grad_norm": 2.044262647628784, - "learning_rate": 6.993711031416402e-06, - "loss": 1.0973, - "step": 2387 - }, - { - "epoch": 1.8073793755912961, - "grad_norm": 2.0718541145324707, - "learning_rate": 6.986055886267265e-06, - "loss": 1.1224, - "step": 2388 - }, - { - "epoch": 1.8081362346263008, - "grad_norm": 2.132376194000244, - "learning_rate": 6.97840268450513e-06, - "loss": 1.083, - "step": 2389 - }, - { - "epoch": 1.8088930936613057, - "grad_norm": 2.1769330501556396, - "learning_rate": 6.970751431063124e-06, - "loss": 1.148, - "step": 2390 - }, - { - "epoch": 1.8096499526963103, - "grad_norm": 2.1449358463287354, - "learning_rate": 6.963102130873134e-06, - "loss": 1.0967, - "step": 2391 - }, - { - "epoch": 1.810406811731315, - "grad_norm": 1.9859085083007812, - "learning_rate": 6.955454788865785e-06, - "loss": 1.1101, - "step": 2392 - }, - { - "epoch": 1.81116367076632, - "grad_norm": 2.211151361465454, - "learning_rate": 6.947809409970431e-06, - "loss": 1.114, - "step": 2393 - }, - { - "epoch": 1.8119205298013243, - "grad_norm": 2.0564661026000977, - "learning_rate": 6.940165999115169e-06, - "loss": 1.0703, - "step": 2394 - }, - { - "epoch": 1.8126773888363292, - "grad_norm": 2.2020647525787354, - "learning_rate": 6.932524561226824e-06, - "loss": 1.0784, - "step": 2395 - }, - { - "epoch": 1.8134342478713341, - "grad_norm": 2.0232954025268555, - "learning_rate": 6.924885101230955e-06, - "loss": 1.1231, - "step": 2396 - }, - { - "epoch": 1.8141911069063386, - "grad_norm": 2.0655837059020996, - "learning_rate": 6.917247624051836e-06, - "loss": 1.102, - "step": 2397 - }, - { - "epoch": 1.8149479659413434, - "grad_norm": 2.0320346355438232, - "learning_rate": 6.90961213461247e-06, - "loss": 1.119, - "step": 2398 - }, - { - "epoch": 1.815704824976348, - "grad_norm": 2.415329694747925, - "learning_rate": 6.901978637834579e-06, - "loss": 1.1015, - "step": 2399 - }, - { - "epoch": 1.8164616840113528, - "grad_norm": 1.962516188621521, - "learning_rate": 6.894347138638595e-06, - "loss": 1.1063, - "step": 2400 - }, - { - "epoch": 1.8172185430463577, - "grad_norm": 2.263796329498291, - "learning_rate": 6.886717641943668e-06, - "loss": 1.0946, - "step": 2401 - }, - { - "epoch": 1.8179754020813623, - "grad_norm": 1.8907090425491333, - "learning_rate": 6.879090152667655e-06, - "loss": 1.0842, - "step": 2402 - }, - { - "epoch": 1.818732261116367, - "grad_norm": 2.3313028812408447, - "learning_rate": 6.871464675727122e-06, - "loss": 1.0972, - "step": 2403 - }, - { - "epoch": 1.8194891201513719, - "grad_norm": 2.123699903488159, - "learning_rate": 6.8638412160373294e-06, - "loss": 1.0953, - "step": 2404 - }, - { - "epoch": 1.8202459791863765, - "grad_norm": 2.1058695316314697, - "learning_rate": 6.856219778512248e-06, - "loss": 1.0983, - "step": 2405 - }, - { - "epoch": 1.8210028382213812, - "grad_norm": 2.0354390144348145, - "learning_rate": 6.8486003680645384e-06, - "loss": 1.1184, - "step": 2406 - }, - { - "epoch": 1.821759697256386, - "grad_norm": 2.448774814605713, - "learning_rate": 6.840982989605554e-06, - "loss": 1.1902, - "step": 2407 - }, - { - "epoch": 1.8225165562913908, - "grad_norm": 2.067413330078125, - "learning_rate": 6.833367648045347e-06, - "loss": 1.0844, - "step": 2408 - }, - { - "epoch": 1.8232734153263954, - "grad_norm": 1.8351151943206787, - "learning_rate": 6.825754348292647e-06, - "loss": 1.0751, - "step": 2409 - }, - { - "epoch": 1.8240302743614003, - "grad_norm": 2.036219835281372, - "learning_rate": 6.8181430952548664e-06, - "loss": 1.1118, - "step": 2410 - }, - { - "epoch": 1.824787133396405, - "grad_norm": 2.2903716564178467, - "learning_rate": 6.810533893838111e-06, - "loss": 1.1085, - "step": 2411 - }, - { - "epoch": 1.8255439924314096, - "grad_norm": 2.1487245559692383, - "learning_rate": 6.802926748947149e-06, - "loss": 1.0766, - "step": 2412 - }, - { - "epoch": 1.8263008514664145, - "grad_norm": 2.073429822921753, - "learning_rate": 6.795321665485434e-06, - "loss": 1.1056, - "step": 2413 - }, - { - "epoch": 1.827057710501419, - "grad_norm": 2.1071133613586426, - "learning_rate": 6.7877186483550865e-06, - "loss": 1.1688, - "step": 2414 - }, - { - "epoch": 1.8278145695364238, - "grad_norm": 2.153792381286621, - "learning_rate": 6.780117702456892e-06, - "loss": 1.1281, - "step": 2415 - }, - { - "epoch": 1.8285714285714287, - "grad_norm": 2.046393632888794, - "learning_rate": 6.772518832690312e-06, - "loss": 1.1413, - "step": 2416 - }, - { - "epoch": 1.8293282876064332, - "grad_norm": 2.0445821285247803, - "learning_rate": 6.764922043953452e-06, - "loss": 1.0761, - "step": 2417 - }, - { - "epoch": 1.830085146641438, - "grad_norm": 2.1296608448028564, - "learning_rate": 6.757327341143093e-06, - "loss": 1.1077, - "step": 2418 - }, - { - "epoch": 1.8308420056764427, - "grad_norm": 2.218290328979492, - "learning_rate": 6.749734729154663e-06, - "loss": 1.0869, - "step": 2419 - }, - { - "epoch": 1.8315988647114474, - "grad_norm": 2.161032199859619, - "learning_rate": 6.742144212882244e-06, - "loss": 1.1116, - "step": 2420 - }, - { - "epoch": 1.8323557237464523, - "grad_norm": 1.979115605354309, - "learning_rate": 6.734555797218567e-06, - "loss": 1.1321, - "step": 2421 - }, - { - "epoch": 1.833112582781457, - "grad_norm": 2.016322135925293, - "learning_rate": 6.726969487055008e-06, - "loss": 1.1265, - "step": 2422 - }, - { - "epoch": 1.8338694418164616, - "grad_norm": 1.943589448928833, - "learning_rate": 6.719385287281589e-06, - "loss": 1.079, - "step": 2423 - }, - { - "epoch": 1.8346263008514665, - "grad_norm": 2.0779478549957275, - "learning_rate": 6.711803202786965e-06, - "loss": 1.161, - "step": 2424 - }, - { - "epoch": 1.8353831598864712, - "grad_norm": 1.9313197135925293, - "learning_rate": 6.7042232384584396e-06, - "loss": 1.114, - "step": 2425 - }, - { - "epoch": 1.8361400189214758, - "grad_norm": 2.177368640899658, - "learning_rate": 6.6966453991819355e-06, - "loss": 1.1141, - "step": 2426 - }, - { - "epoch": 1.8368968779564807, - "grad_norm": 2.155545234680176, - "learning_rate": 6.689069689842015e-06, - "loss": 1.1058, - "step": 2427 - }, - { - "epoch": 1.8376537369914854, - "grad_norm": 2.1615564823150635, - "learning_rate": 6.681496115321863e-06, - "loss": 1.1445, - "step": 2428 - }, - { - "epoch": 1.83841059602649, - "grad_norm": 2.2454423904418945, - "learning_rate": 6.6739246805032895e-06, - "loss": 1.1575, - "step": 2429 - }, - { - "epoch": 1.839167455061495, - "grad_norm": 1.9341751337051392, - "learning_rate": 6.6663553902667345e-06, - "loss": 1.0993, - "step": 2430 - }, - { - "epoch": 1.8399243140964994, - "grad_norm": 2.307654857635498, - "learning_rate": 6.658788249491236e-06, - "loss": 1.0903, - "step": 2431 - }, - { - "epoch": 1.8406811731315043, - "grad_norm": 2.172126293182373, - "learning_rate": 6.651223263054462e-06, - "loss": 1.1384, - "step": 2432 - }, - { - "epoch": 1.8414380321665091, - "grad_norm": 2.061699151992798, - "learning_rate": 6.64366043583269e-06, - "loss": 1.1066, - "step": 2433 - }, - { - "epoch": 1.8421948912015136, - "grad_norm": 2.0565085411071777, - "learning_rate": 6.636099772700797e-06, - "loss": 1.1265, - "step": 2434 - }, - { - "epoch": 1.8429517502365185, - "grad_norm": 2.3713178634643555, - "learning_rate": 6.628541278532276e-06, - "loss": 1.1067, - "step": 2435 - }, - { - "epoch": 1.8437086092715231, - "grad_norm": 2.0300235748291016, - "learning_rate": 6.620984958199217e-06, - "loss": 1.1053, - "step": 2436 - }, - { - "epoch": 1.8444654683065278, - "grad_norm": 1.8853594064712524, - "learning_rate": 6.613430816572308e-06, - "loss": 1.1375, - "step": 2437 - }, - { - "epoch": 1.8452223273415327, - "grad_norm": 2.140911102294922, - "learning_rate": 6.605878858520832e-06, - "loss": 1.1372, - "step": 2438 - }, - { - "epoch": 1.8459791863765374, - "grad_norm": 2.0533270835876465, - "learning_rate": 6.598329088912666e-06, - "loss": 1.1054, - "step": 2439 - }, - { - "epoch": 1.846736045411542, - "grad_norm": 2.0813000202178955, - "learning_rate": 6.59078151261428e-06, - "loss": 1.0635, - "step": 2440 - }, - { - "epoch": 1.847492904446547, - "grad_norm": 2.0938546657562256, - "learning_rate": 6.5832361344907225e-06, - "loss": 1.1368, - "step": 2441 - }, - { - "epoch": 1.8482497634815516, - "grad_norm": 2.1274354457855225, - "learning_rate": 6.57569295940563e-06, - "loss": 1.1446, - "step": 2442 - }, - { - "epoch": 1.8490066225165562, - "grad_norm": 2.2737364768981934, - "learning_rate": 6.5681519922212175e-06, - "loss": 1.1007, - "step": 2443 - }, - { - "epoch": 1.8497634815515611, - "grad_norm": 2.0562212467193604, - "learning_rate": 6.560613237798282e-06, - "loss": 1.1033, - "step": 2444 - }, - { - "epoch": 1.8505203405865658, - "grad_norm": 2.1894006729125977, - "learning_rate": 6.553076700996186e-06, - "loss": 1.1733, - "step": 2445 - }, - { - "epoch": 1.8512771996215704, - "grad_norm": 2.1526927947998047, - "learning_rate": 6.545542386672864e-06, - "loss": 1.1254, - "step": 2446 - }, - { - "epoch": 1.8520340586565753, - "grad_norm": 2.335092306137085, - "learning_rate": 6.538010299684827e-06, - "loss": 1.089, - "step": 2447 - }, - { - "epoch": 1.85279091769158, - "grad_norm": 2.1147849559783936, - "learning_rate": 6.530480444887135e-06, - "loss": 1.1075, - "step": 2448 - }, - { - "epoch": 1.8535477767265847, - "grad_norm": 1.9990819692611694, - "learning_rate": 6.522952827133424e-06, - "loss": 1.1069, - "step": 2449 - }, - { - "epoch": 1.8543046357615895, - "grad_norm": 2.2554056644439697, - "learning_rate": 6.515427451275879e-06, - "loss": 1.1205, - "step": 2450 - }, - { - "epoch": 1.855061494796594, - "grad_norm": 2.143373489379883, - "learning_rate": 6.507904322165242e-06, - "loss": 1.1, - "step": 2451 - }, - { - "epoch": 1.8558183538315989, - "grad_norm": 2.145324468612671, - "learning_rate": 6.500383444650808e-06, - "loss": 1.124, - "step": 2452 - }, - { - "epoch": 1.8565752128666035, - "grad_norm": 2.0681822299957275, - "learning_rate": 6.492864823580418e-06, - "loss": 1.1404, - "step": 2453 - }, - { - "epoch": 1.8573320719016082, - "grad_norm": 2.0816290378570557, - "learning_rate": 6.485348463800467e-06, - "loss": 1.121, - "step": 2454 - }, - { - "epoch": 1.858088930936613, - "grad_norm": 2.210402488708496, - "learning_rate": 6.477834370155879e-06, - "loss": 1.081, - "step": 2455 - }, - { - "epoch": 1.8588457899716178, - "grad_norm": 2.258357286453247, - "learning_rate": 6.4703225474901266e-06, - "loss": 1.1221, - "step": 2456 - }, - { - "epoch": 1.8596026490066224, - "grad_norm": 2.336432456970215, - "learning_rate": 6.462813000645216e-06, - "loss": 1.1288, - "step": 2457 - }, - { - "epoch": 1.8603595080416273, - "grad_norm": 2.224451780319214, - "learning_rate": 6.4553057344616885e-06, - "loss": 1.1213, - "step": 2458 - }, - { - "epoch": 1.861116367076632, - "grad_norm": 2.012571096420288, - "learning_rate": 6.447800753778608e-06, - "loss": 1.1079, - "step": 2459 - }, - { - "epoch": 1.8618732261116366, - "grad_norm": 2.0077013969421387, - "learning_rate": 6.440298063433578e-06, - "loss": 1.1139, - "step": 2460 - }, - { - "epoch": 1.8626300851466415, - "grad_norm": 2.0572779178619385, - "learning_rate": 6.432797668262713e-06, - "loss": 1.1225, - "step": 2461 - }, - { - "epoch": 1.8633869441816462, - "grad_norm": 2.052415609359741, - "learning_rate": 6.425299573100653e-06, - "loss": 1.1232, - "step": 2462 - }, - { - "epoch": 1.8641438032166509, - "grad_norm": 2.1070804595947266, - "learning_rate": 6.41780378278056e-06, - "loss": 1.1425, - "step": 2463 - }, - { - "epoch": 1.8649006622516557, - "grad_norm": 2.1018309593200684, - "learning_rate": 6.410310302134102e-06, - "loss": 1.124, - "step": 2464 - }, - { - "epoch": 1.8656575212866604, - "grad_norm": 2.104137897491455, - "learning_rate": 6.4028191359914635e-06, - "loss": 1.1366, - "step": 2465 - }, - { - "epoch": 1.866414380321665, - "grad_norm": 2.196840763092041, - "learning_rate": 6.395330289181339e-06, - "loss": 1.1138, - "step": 2466 - }, - { - "epoch": 1.86717123935667, - "grad_norm": 2.0204899311065674, - "learning_rate": 6.38784376653092e-06, - "loss": 1.1571, - "step": 2467 - }, - { - "epoch": 1.8679280983916744, - "grad_norm": 2.1718480587005615, - "learning_rate": 6.380359572865909e-06, - "loss": 1.1265, - "step": 2468 - }, - { - "epoch": 1.8686849574266793, - "grad_norm": 2.2680718898773193, - "learning_rate": 6.372877713010501e-06, - "loss": 1.1218, - "step": 2469 - }, - { - "epoch": 1.8694418164616842, - "grad_norm": 1.9217084646224976, - "learning_rate": 6.365398191787388e-06, - "loss": 1.0846, - "step": 2470 - }, - { - "epoch": 1.8701986754966886, - "grad_norm": 2.0585711002349854, - "learning_rate": 6.35792101401776e-06, - "loss": 1.1274, - "step": 2471 - }, - { - "epoch": 1.8709555345316935, - "grad_norm": 1.989283800125122, - "learning_rate": 6.350446184521285e-06, - "loss": 1.095, - "step": 2472 - }, - { - "epoch": 1.8717123935666982, - "grad_norm": 1.886738657951355, - "learning_rate": 6.3429737081161265e-06, - "loss": 1.1196, - "step": 2473 - }, - { - "epoch": 1.8724692526017028, - "grad_norm": 1.9688234329223633, - "learning_rate": 6.335503589618933e-06, - "loss": 1.143, - "step": 2474 - }, - { - "epoch": 1.8732261116367077, - "grad_norm": 2.37060284614563, - "learning_rate": 6.328035833844823e-06, - "loss": 1.1088, - "step": 2475 - }, - { - "epoch": 1.8739829706717124, - "grad_norm": 2.3199589252471924, - "learning_rate": 6.320570445607399e-06, - "loss": 1.1072, - "step": 2476 - }, - { - "epoch": 1.874739829706717, - "grad_norm": 1.914215087890625, - "learning_rate": 6.313107429718741e-06, - "loss": 1.1222, - "step": 2477 - }, - { - "epoch": 1.875496688741722, - "grad_norm": 2.3843131065368652, - "learning_rate": 6.305646790989391e-06, - "loss": 1.1509, - "step": 2478 - }, - { - "epoch": 1.8762535477767266, - "grad_norm": 2.1501553058624268, - "learning_rate": 6.298188534228365e-06, - "loss": 1.0925, - "step": 2479 - }, - { - "epoch": 1.8770104068117313, - "grad_norm": 2.103590965270996, - "learning_rate": 6.290732664243141e-06, - "loss": 1.1068, - "step": 2480 - }, - { - "epoch": 1.8777672658467361, - "grad_norm": 2.1373817920684814, - "learning_rate": 6.283279185839658e-06, - "loss": 1.1228, - "step": 2481 - }, - { - "epoch": 1.8785241248817408, - "grad_norm": 1.9448984861373901, - "learning_rate": 6.275828103822317e-06, - "loss": 1.1138, - "step": 2482 - }, - { - "epoch": 1.8792809839167455, - "grad_norm": 2.133575916290283, - "learning_rate": 6.268379422993969e-06, - "loss": 1.137, - "step": 2483 - }, - { - "epoch": 1.8800378429517504, - "grad_norm": 2.0521798133850098, - "learning_rate": 6.26093314815592e-06, - "loss": 1.1077, - "step": 2484 - }, - { - "epoch": 1.8807947019867548, - "grad_norm": 2.154632091522217, - "learning_rate": 6.253489284107929e-06, - "loss": 1.0963, - "step": 2485 - }, - { - "epoch": 1.8815515610217597, - "grad_norm": 2.0606281757354736, - "learning_rate": 6.246047835648191e-06, - "loss": 1.1233, - "step": 2486 - }, - { - "epoch": 1.8823084200567646, - "grad_norm": 1.9377020597457886, - "learning_rate": 6.238608807573355e-06, - "loss": 1.128, - "step": 2487 - }, - { - "epoch": 1.883065279091769, - "grad_norm": 2.133552074432373, - "learning_rate": 6.231172204678507e-06, - "loss": 1.0872, - "step": 2488 - }, - { - "epoch": 1.883822138126774, - "grad_norm": 2.1637847423553467, - "learning_rate": 6.2237380317571626e-06, - "loss": 1.1051, - "step": 2489 - }, - { - "epoch": 1.8845789971617786, - "grad_norm": 2.0870983600616455, - "learning_rate": 6.216306293601277e-06, - "loss": 1.1296, - "step": 2490 - }, - { - "epoch": 1.8853358561967832, - "grad_norm": 2.129365921020508, - "learning_rate": 6.20887699500124e-06, - "loss": 1.1125, - "step": 2491 - }, - { - "epoch": 1.8860927152317881, - "grad_norm": 1.887802004814148, - "learning_rate": 6.20145014074586e-06, - "loss": 1.0531, - "step": 2492 - }, - { - "epoch": 1.8868495742667928, - "grad_norm": 2.163595199584961, - "learning_rate": 6.194025735622371e-06, - "loss": 1.0727, - "step": 2493 - }, - { - "epoch": 1.8876064333017974, - "grad_norm": 1.8616597652435303, - "learning_rate": 6.186603784416441e-06, - "loss": 1.0561, - "step": 2494 - }, - { - "epoch": 1.8883632923368023, - "grad_norm": 2.1504313945770264, - "learning_rate": 6.179184291912138e-06, - "loss": 1.1093, - "step": 2495 - }, - { - "epoch": 1.889120151371807, - "grad_norm": 1.997025728225708, - "learning_rate": 6.171767262891958e-06, - "loss": 1.0991, - "step": 2496 - }, - { - "epoch": 1.8898770104068117, - "grad_norm": 2.141857147216797, - "learning_rate": 6.164352702136799e-06, - "loss": 1.0897, - "step": 2497 - }, - { - "epoch": 1.8906338694418166, - "grad_norm": 1.9679754972457886, - "learning_rate": 6.15694061442598e-06, - "loss": 1.0971, - "step": 2498 - }, - { - "epoch": 1.8913907284768212, - "grad_norm": 2.048257350921631, - "learning_rate": 6.149531004537222e-06, - "loss": 1.1139, - "step": 2499 - }, - { - "epoch": 1.8921475875118259, - "grad_norm": 2.4383885860443115, - "learning_rate": 6.1421238772466375e-06, - "loss": 1.1028, - "step": 2500 - }, - { - "epoch": 1.8929044465468308, - "grad_norm": 2.0352723598480225, - "learning_rate": 6.134719237328751e-06, - "loss": 1.0957, - "step": 2501 - }, - { - "epoch": 1.8936613055818354, - "grad_norm": 2.1713624000549316, - "learning_rate": 6.127317089556489e-06, - "loss": 1.0726, - "step": 2502 - }, - { - "epoch": 1.89441816461684, - "grad_norm": 2.2224864959716797, - "learning_rate": 6.119917438701151e-06, - "loss": 1.0919, - "step": 2503 - }, - { - "epoch": 1.895175023651845, - "grad_norm": 2.1596179008483887, - "learning_rate": 6.112520289532445e-06, - "loss": 1.1273, - "step": 2504 - }, - { - "epoch": 1.8959318826868494, - "grad_norm": 2.018328905105591, - "learning_rate": 6.105125646818463e-06, - "loss": 1.1354, - "step": 2505 - }, - { - "epoch": 1.8966887417218543, - "grad_norm": 2.1755290031433105, - "learning_rate": 6.097733515325671e-06, - "loss": 1.1037, - "step": 2506 - }, - { - "epoch": 1.8974456007568592, - "grad_norm": 2.172973871231079, - "learning_rate": 6.090343899818931e-06, - "loss": 1.0656, - "step": 2507 - }, - { - "epoch": 1.8982024597918636, - "grad_norm": 2.193934679031372, - "learning_rate": 6.0829568050614725e-06, - "loss": 1.1252, - "step": 2508 - }, - { - "epoch": 1.8989593188268685, - "grad_norm": 2.3651788234710693, - "learning_rate": 6.075572235814909e-06, - "loss": 1.1242, - "step": 2509 - }, - { - "epoch": 1.8997161778618732, - "grad_norm": 2.107897996902466, - "learning_rate": 6.0681901968392184e-06, - "loss": 1.0937, - "step": 2510 - }, - { - "epoch": 1.9004730368968779, - "grad_norm": 2.57551908493042, - "learning_rate": 6.060810692892748e-06, - "loss": 1.129, - "step": 2511 - }, - { - "epoch": 1.9012298959318827, - "grad_norm": 2.0832760334014893, - "learning_rate": 6.053433728732217e-06, - "loss": 1.138, - "step": 2512 - }, - { - "epoch": 1.9019867549668874, - "grad_norm": 1.9609954357147217, - "learning_rate": 6.046059309112703e-06, - "loss": 1.1404, - "step": 2513 - }, - { - "epoch": 1.902743614001892, - "grad_norm": 2.1895411014556885, - "learning_rate": 6.038687438787642e-06, - "loss": 1.1378, - "step": 2514 - }, - { - "epoch": 1.903500473036897, - "grad_norm": 2.058955430984497, - "learning_rate": 6.031318122508833e-06, - "loss": 1.117, - "step": 2515 - }, - { - "epoch": 1.9042573320719016, - "grad_norm": 2.1496293544769287, - "learning_rate": 6.023951365026426e-06, - "loss": 1.1115, - "step": 2516 - }, - { - "epoch": 1.9050141911069063, - "grad_norm": 2.147587776184082, - "learning_rate": 6.016587171088913e-06, - "loss": 1.1419, - "step": 2517 - }, - { - "epoch": 1.9057710501419112, - "grad_norm": 2.470024824142456, - "learning_rate": 6.009225545443148e-06, - "loss": 1.1518, - "step": 2518 - }, - { - "epoch": 1.9065279091769158, - "grad_norm": 2.0301973819732666, - "learning_rate": 6.001866492834322e-06, - "loss": 1.0815, - "step": 2519 - }, - { - "epoch": 1.9072847682119205, - "grad_norm": 2.2255492210388184, - "learning_rate": 5.994510018005964e-06, - "loss": 1.1555, - "step": 2520 - }, - { - "epoch": 1.9080416272469254, - "grad_norm": 2.101928472518921, - "learning_rate": 5.987156125699951e-06, - "loss": 1.1251, - "step": 2521 - }, - { - "epoch": 1.9087984862819298, - "grad_norm": 2.0817983150482178, - "learning_rate": 5.979804820656483e-06, - "loss": 1.1233, - "step": 2522 - }, - { - "epoch": 1.9095553453169347, - "grad_norm": 2.082615375518799, - "learning_rate": 5.972456107614105e-06, - "loss": 1.1198, - "step": 2523 - }, - { - "epoch": 1.9103122043519396, - "grad_norm": 2.036180257797241, - "learning_rate": 5.965109991309686e-06, - "loss": 1.1056, - "step": 2524 - }, - { - "epoch": 1.911069063386944, - "grad_norm": 2.358384847640991, - "learning_rate": 5.9577664764784126e-06, - "loss": 1.1125, - "step": 2525 - }, - { - "epoch": 1.911825922421949, - "grad_norm": 2.097381830215454, - "learning_rate": 5.950425567853813e-06, - "loss": 1.1394, - "step": 2526 - }, - { - "epoch": 1.9125827814569536, - "grad_norm": 2.0445775985717773, - "learning_rate": 5.943087270167718e-06, - "loss": 1.1276, - "step": 2527 - }, - { - "epoch": 1.9133396404919583, - "grad_norm": 2.2490360736846924, - "learning_rate": 5.935751588150282e-06, - "loss": 1.0963, - "step": 2528 - }, - { - "epoch": 1.9140964995269631, - "grad_norm": 2.212881088256836, - "learning_rate": 5.928418526529981e-06, - "loss": 1.0829, - "step": 2529 - }, - { - "epoch": 1.9148533585619678, - "grad_norm": 1.9197094440460205, - "learning_rate": 5.921088090033585e-06, - "loss": 1.0947, - "step": 2530 - }, - { - "epoch": 1.9156102175969725, - "grad_norm": 2.0829176902770996, - "learning_rate": 5.913760283386186e-06, - "loss": 1.1466, - "step": 2531 - }, - { - "epoch": 1.9163670766319774, - "grad_norm": 2.326220750808716, - "learning_rate": 5.906435111311179e-06, - "loss": 1.131, - "step": 2532 - }, - { - "epoch": 1.917123935666982, - "grad_norm": 2.2894301414489746, - "learning_rate": 5.899112578530255e-06, - "loss": 1.1062, - "step": 2533 - }, - { - "epoch": 1.9178807947019867, - "grad_norm": 2.134059190750122, - "learning_rate": 5.891792689763407e-06, - "loss": 1.1116, - "step": 2534 - }, - { - "epoch": 1.9186376537369916, - "grad_norm": 2.1360747814178467, - "learning_rate": 5.884475449728925e-06, - "loss": 1.15, - "step": 2535 - }, - { - "epoch": 1.9193945127719962, - "grad_norm": 2.3759396076202393, - "learning_rate": 5.877160863143391e-06, - "loss": 1.0696, - "step": 2536 - }, - { - "epoch": 1.920151371807001, - "grad_norm": 2.216271162033081, - "learning_rate": 5.869848934721671e-06, - "loss": 1.166, - "step": 2537 - }, - { - "epoch": 1.9209082308420058, - "grad_norm": 2.0322463512420654, - "learning_rate": 5.86253966917693e-06, - "loss": 1.1031, - "step": 2538 - }, - { - "epoch": 1.9216650898770105, - "grad_norm": 1.9586721658706665, - "learning_rate": 5.855233071220603e-06, - "loss": 1.1062, - "step": 2539 - }, - { - "epoch": 1.9224219489120151, - "grad_norm": 2.202064037322998, - "learning_rate": 5.8479291455624186e-06, - "loss": 1.1295, - "step": 2540 - }, - { - "epoch": 1.92317880794702, - "grad_norm": 2.291038751602173, - "learning_rate": 5.840627896910365e-06, - "loss": 1.157, - "step": 2541 - }, - { - "epoch": 1.9239356669820245, - "grad_norm": 1.989047884941101, - "learning_rate": 5.833329329970726e-06, - "loss": 1.1506, - "step": 2542 - }, - { - "epoch": 1.9246925260170293, - "grad_norm": 1.9984663724899292, - "learning_rate": 5.82603344944804e-06, - "loss": 1.106, - "step": 2543 - }, - { - "epoch": 1.9254493850520342, - "grad_norm": 2.3392581939697266, - "learning_rate": 5.818740260045123e-06, - "loss": 1.1819, - "step": 2544 - }, - { - "epoch": 1.9262062440870387, - "grad_norm": 2.148768663406372, - "learning_rate": 5.811449766463058e-06, - "loss": 1.1439, - "step": 2545 - }, - { - "epoch": 1.9269631031220436, - "grad_norm": 2.033663511276245, - "learning_rate": 5.804161973401175e-06, - "loss": 1.1111, - "step": 2546 - }, - { - "epoch": 1.9277199621570482, - "grad_norm": 2.2173452377319336, - "learning_rate": 5.796876885557084e-06, - "loss": 1.0752, - "step": 2547 - }, - { - "epoch": 1.9284768211920529, - "grad_norm": 2.039340019226074, - "learning_rate": 5.78959450762664e-06, - "loss": 1.0878, - "step": 2548 - }, - { - "epoch": 1.9292336802270578, - "grad_norm": 2.22098445892334, - "learning_rate": 5.782314844303949e-06, - "loss": 1.1109, - "step": 2549 - }, - { - "epoch": 1.9299905392620624, - "grad_norm": 1.9632805585861206, - "learning_rate": 5.775037900281372e-06, - "loss": 1.0981, - "step": 2550 - }, - { - "epoch": 1.930747398297067, - "grad_norm": 2.195981025695801, - "learning_rate": 5.767763680249521e-06, - "loss": 1.0659, - "step": 2551 - }, - { - "epoch": 1.931504257332072, - "grad_norm": 2.3889381885528564, - "learning_rate": 5.760492188897241e-06, - "loss": 1.1027, - "step": 2552 - }, - { - "epoch": 1.9322611163670766, - "grad_norm": 2.212132692337036, - "learning_rate": 5.753223430911625e-06, - "loss": 1.1435, - "step": 2553 - }, - { - "epoch": 1.9330179754020813, - "grad_norm": 2.109678268432617, - "learning_rate": 5.7459574109780105e-06, - "loss": 1.1226, - "step": 2554 - }, - { - "epoch": 1.9337748344370862, - "grad_norm": 2.1192758083343506, - "learning_rate": 5.738694133779954e-06, - "loss": 1.158, - "step": 2555 - }, - { - "epoch": 1.9345316934720909, - "grad_norm": 2.249246597290039, - "learning_rate": 5.7314336039992565e-06, - "loss": 1.1342, - "step": 2556 - }, - { - "epoch": 1.9352885525070955, - "grad_norm": 2.1159563064575195, - "learning_rate": 5.7241758263159504e-06, - "loss": 1.0984, - "step": 2557 - }, - { - "epoch": 1.9360454115421004, - "grad_norm": 2.092686891555786, - "learning_rate": 5.7169208054082794e-06, - "loss": 1.1462, - "step": 2558 - }, - { - "epoch": 1.9368022705771049, - "grad_norm": 2.201853036880493, - "learning_rate": 5.7096685459527235e-06, - "loss": 1.1121, - "step": 2559 - }, - { - "epoch": 1.9375591296121097, - "grad_norm": 2.29725980758667, - "learning_rate": 5.702419052623982e-06, - "loss": 1.17, - "step": 2560 - }, - { - "epoch": 1.9383159886471146, - "grad_norm": 2.636181592941284, - "learning_rate": 5.695172330094961e-06, - "loss": 1.1398, - "step": 2561 - }, - { - "epoch": 1.939072847682119, - "grad_norm": 2.0273807048797607, - "learning_rate": 5.687928383036795e-06, - "loss": 1.0939, - "step": 2562 - }, - { - "epoch": 1.939829706717124, - "grad_norm": 2.2744505405426025, - "learning_rate": 5.680687216118814e-06, - "loss": 1.1233, - "step": 2563 - }, - { - "epoch": 1.9405865657521286, - "grad_norm": 1.976132869720459, - "learning_rate": 5.6734488340085665e-06, - "loss": 1.1573, - "step": 2564 - }, - { - "epoch": 1.9413434247871333, - "grad_norm": 5.837771415710449, - "learning_rate": 5.666213241371809e-06, - "loss": 1.1299, - "step": 2565 - }, - { - "epoch": 1.9421002838221382, - "grad_norm": 2.1729319095611572, - "learning_rate": 5.658980442872484e-06, - "loss": 1.1399, - "step": 2566 - }, - { - "epoch": 1.9428571428571428, - "grad_norm": 2.049954652786255, - "learning_rate": 5.651750443172749e-06, - "loss": 1.1012, - "step": 2567 - }, - { - "epoch": 1.9436140018921475, - "grad_norm": 2.170069694519043, - "learning_rate": 5.644523246932951e-06, - "loss": 1.1276, - "step": 2568 - }, - { - "epoch": 1.9443708609271524, - "grad_norm": 2.143918514251709, - "learning_rate": 5.637298858811633e-06, - "loss": 1.1127, - "step": 2569 - }, - { - "epoch": 1.945127719962157, - "grad_norm": 2.114530324935913, - "learning_rate": 5.6300772834655195e-06, - "loss": 1.0949, - "step": 2570 - }, - { - "epoch": 1.9458845789971617, - "grad_norm": 2.067688226699829, - "learning_rate": 5.6228585255495315e-06, - "loss": 1.1402, - "step": 2571 - }, - { - "epoch": 1.9466414380321666, - "grad_norm": 2.254387855529785, - "learning_rate": 5.615642589716773e-06, - "loss": 1.197, - "step": 2572 - }, - { - "epoch": 1.9473982970671713, - "grad_norm": 2.0530171394348145, - "learning_rate": 5.608429480618519e-06, - "loss": 1.0985, - "step": 2573 - }, - { - "epoch": 1.948155156102176, - "grad_norm": 1.9690866470336914, - "learning_rate": 5.6012192029042354e-06, - "loss": 1.1004, - "step": 2574 - }, - { - "epoch": 1.9489120151371808, - "grad_norm": 2.2864949703216553, - "learning_rate": 5.594011761221554e-06, - "loss": 1.1298, - "step": 2575 - }, - { - "epoch": 1.9496688741721855, - "grad_norm": 2.291849374771118, - "learning_rate": 5.5868071602162875e-06, - "loss": 1.1472, - "step": 2576 - }, - { - "epoch": 1.9504257332071901, - "grad_norm": 2.223792552947998, - "learning_rate": 5.579605404532403e-06, - "loss": 1.1307, - "step": 2577 - }, - { - "epoch": 1.951182592242195, - "grad_norm": 2.0151219367980957, - "learning_rate": 5.572406498812049e-06, - "loss": 1.1281, - "step": 2578 - }, - { - "epoch": 1.9519394512771995, - "grad_norm": 2.023299217224121, - "learning_rate": 5.565210447695529e-06, - "loss": 1.165, - "step": 2579 - }, - { - "epoch": 1.9526963103122044, - "grad_norm": 1.902061939239502, - "learning_rate": 5.5580172558213064e-06, - "loss": 1.061, - "step": 2580 - }, - { - "epoch": 1.953453169347209, - "grad_norm": 2.015148639678955, - "learning_rate": 5.550826927826003e-06, - "loss": 1.0866, - "step": 2581 - }, - { - "epoch": 1.9542100283822137, - "grad_norm": 1.9616479873657227, - "learning_rate": 5.5436394683443996e-06, - "loss": 1.0903, - "step": 2582 - }, - { - "epoch": 1.9549668874172186, - "grad_norm": 1.9738472700119019, - "learning_rate": 5.536454882009412e-06, - "loss": 1.1312, - "step": 2583 - }, - { - "epoch": 1.9557237464522232, - "grad_norm": 2.2209506034851074, - "learning_rate": 5.52927317345213e-06, - "loss": 1.0352, - "step": 2584 - }, - { - "epoch": 1.956480605487228, - "grad_norm": 2.2492170333862305, - "learning_rate": 5.522094347301757e-06, - "loss": 1.0982, - "step": 2585 - }, - { - "epoch": 1.9572374645222328, - "grad_norm": 1.9593442678451538, - "learning_rate": 5.514918408185666e-06, - "loss": 1.1162, - "step": 2586 - }, - { - "epoch": 1.9579943235572375, - "grad_norm": 2.0279417037963867, - "learning_rate": 5.507745360729356e-06, - "loss": 1.1148, - "step": 2587 - }, - { - "epoch": 1.9587511825922421, - "grad_norm": 2.1683051586151123, - "learning_rate": 5.500575209556462e-06, - "loss": 1.1078, - "step": 2588 - }, - { - "epoch": 1.959508041627247, - "grad_norm": 2.0510294437408447, - "learning_rate": 5.493407959288752e-06, - "loss": 1.1099, - "step": 2589 - }, - { - "epoch": 1.9602649006622517, - "grad_norm": 2.144102096557617, - "learning_rate": 5.486243614546135e-06, - "loss": 1.0938, - "step": 2590 - }, - { - "epoch": 1.9610217596972563, - "grad_norm": 1.9423801898956299, - "learning_rate": 5.479082179946628e-06, - "loss": 1.0941, - "step": 2591 - }, - { - "epoch": 1.9617786187322612, - "grad_norm": 1.9970104694366455, - "learning_rate": 5.471923660106387e-06, - "loss": 1.1106, - "step": 2592 - }, - { - "epoch": 1.962535477767266, - "grad_norm": 2.0680718421936035, - "learning_rate": 5.46476805963969e-06, - "loss": 1.1039, - "step": 2593 - }, - { - "epoch": 1.9632923368022706, - "grad_norm": 2.138693332672119, - "learning_rate": 5.457615383158917e-06, - "loss": 1.1283, - "step": 2594 - }, - { - "epoch": 1.9640491958372754, - "grad_norm": 2.0552265644073486, - "learning_rate": 5.450465635274581e-06, - "loss": 1.0889, - "step": 2595 - }, - { - "epoch": 1.9648060548722799, - "grad_norm": 1.974301815032959, - "learning_rate": 5.443318820595303e-06, - "loss": 1.1174, - "step": 2596 - }, - { - "epoch": 1.9655629139072848, - "grad_norm": 1.9770950078964233, - "learning_rate": 5.436174943727803e-06, - "loss": 1.1261, - "step": 2597 - }, - { - "epoch": 1.9663197729422897, - "grad_norm": 2.0605309009552, - "learning_rate": 5.42903400927692e-06, - "loss": 1.1453, - "step": 2598 - }, - { - "epoch": 1.967076631977294, - "grad_norm": 1.9402192831039429, - "learning_rate": 5.421896021845591e-06, - "loss": 1.1047, - "step": 2599 - }, - { - "epoch": 1.967833491012299, - "grad_norm": 2.0428860187530518, - "learning_rate": 5.4147609860348545e-06, - "loss": 1.0957, - "step": 2600 - }, - { - "epoch": 1.9685903500473036, - "grad_norm": 1.9550975561141968, - "learning_rate": 5.407628906443844e-06, - "loss": 1.1202, - "step": 2601 - }, - { - "epoch": 1.9693472090823083, - "grad_norm": 1.9513860940933228, - "learning_rate": 5.400499787669788e-06, - "loss": 1.0562, - "step": 2602 - }, - { - "epoch": 1.9701040681173132, - "grad_norm": 2.2109532356262207, - "learning_rate": 5.393373634308015e-06, - "loss": 1.1637, - "step": 2603 - }, - { - "epoch": 1.9708609271523179, - "grad_norm": 1.9870307445526123, - "learning_rate": 5.3862504509519245e-06, - "loss": 1.1243, - "step": 2604 - }, - { - "epoch": 1.9716177861873225, - "grad_norm": 2.027862310409546, - "learning_rate": 5.379130242193018e-06, - "loss": 1.0791, - "step": 2605 - }, - { - "epoch": 1.9723746452223274, - "grad_norm": 1.969875454902649, - "learning_rate": 5.372013012620875e-06, - "loss": 1.1593, - "step": 2606 - }, - { - "epoch": 1.973131504257332, - "grad_norm": 2.163132667541504, - "learning_rate": 5.3648987668231475e-06, - "loss": 1.1337, - "step": 2607 - }, - { - "epoch": 1.9738883632923367, - "grad_norm": 2.0548205375671387, - "learning_rate": 5.357787509385571e-06, - "loss": 1.1185, - "step": 2608 - }, - { - "epoch": 1.9746452223273416, - "grad_norm": 2.120103597640991, - "learning_rate": 5.350679244891962e-06, - "loss": 1.1419, - "step": 2609 - }, - { - "epoch": 1.9754020813623463, - "grad_norm": 2.1263537406921387, - "learning_rate": 5.343573977924188e-06, - "loss": 1.1043, - "step": 2610 - }, - { - "epoch": 1.976158940397351, - "grad_norm": 2.0377280712127686, - "learning_rate": 5.3364717130622e-06, - "loss": 1.0852, - "step": 2611 - }, - { - "epoch": 1.9769157994323558, - "grad_norm": 1.9558144807815552, - "learning_rate": 5.329372454884014e-06, - "loss": 1.1432, - "step": 2612 - }, - { - "epoch": 1.9776726584673603, - "grad_norm": 2.1129062175750732, - "learning_rate": 5.322276207965698e-06, - "loss": 1.1347, - "step": 2613 - }, - { - "epoch": 1.9784295175023652, - "grad_norm": 2.042936086654663, - "learning_rate": 5.315182976881382e-06, - "loss": 1.1201, - "step": 2614 - }, - { - "epoch": 1.97918637653737, - "grad_norm": 2.1350150108337402, - "learning_rate": 5.308092766203265e-06, - "loss": 1.0633, - "step": 2615 - }, - { - "epoch": 1.9799432355723745, - "grad_norm": 1.984386682510376, - "learning_rate": 5.301005580501579e-06, - "loss": 1.1045, - "step": 2616 - }, - { - "epoch": 1.9807000946073794, - "grad_norm": 1.9831804037094116, - "learning_rate": 5.293921424344624e-06, - "loss": 1.1376, - "step": 2617 - }, - { - "epoch": 1.981456953642384, - "grad_norm": 2.1934800148010254, - "learning_rate": 5.286840302298729e-06, - "loss": 1.1043, - "step": 2618 - }, - { - "epoch": 1.9822138126773887, - "grad_norm": 2.0572476387023926, - "learning_rate": 5.2797622189282835e-06, - "loss": 1.0807, - "step": 2619 - }, - { - "epoch": 1.9829706717123936, - "grad_norm": 2.0445570945739746, - "learning_rate": 5.272687178795715e-06, - "loss": 1.1075, - "step": 2620 - }, - { - "epoch": 1.9837275307473983, - "grad_norm": 2.0021796226501465, - "learning_rate": 5.265615186461479e-06, - "loss": 1.0971, - "step": 2621 - }, - { - "epoch": 1.984484389782403, - "grad_norm": 2.107959508895874, - "learning_rate": 5.258546246484077e-06, - "loss": 1.1329, - "step": 2622 - }, - { - "epoch": 1.9852412488174078, - "grad_norm": 1.8930269479751587, - "learning_rate": 5.251480363420041e-06, - "loss": 1.0927, - "step": 2623 - }, - { - "epoch": 1.9859981078524125, - "grad_norm": 2.0325968265533447, - "learning_rate": 5.244417541823935e-06, - "loss": 1.1009, - "step": 2624 - }, - { - "epoch": 1.9867549668874172, - "grad_norm": 2.3380300998687744, - "learning_rate": 5.237357786248337e-06, - "loss": 1.0821, - "step": 2625 - }, - { - "epoch": 1.987511825922422, - "grad_norm": 2.121659517288208, - "learning_rate": 5.230301101243864e-06, - "loss": 1.0595, - "step": 2626 - }, - { - "epoch": 1.9882686849574267, - "grad_norm": 2.329930067062378, - "learning_rate": 5.22324749135915e-06, - "loss": 1.1233, - "step": 2627 - }, - { - "epoch": 1.9890255439924314, - "grad_norm": 2.044088840484619, - "learning_rate": 5.216196961140837e-06, - "loss": 1.1064, - "step": 2628 - }, - { - "epoch": 1.9897824030274363, - "grad_norm": 2.201205015182495, - "learning_rate": 5.209149515133593e-06, - "loss": 1.1553, - "step": 2629 - }, - { - "epoch": 1.990539262062441, - "grad_norm": 2.029348850250244, - "learning_rate": 5.202105157880095e-06, - "loss": 1.119, - "step": 2630 - }, - { - "epoch": 1.9912961210974456, - "grad_norm": 2.332625150680542, - "learning_rate": 5.1950638939210296e-06, - "loss": 1.0767, - "step": 2631 - }, - { - "epoch": 1.9920529801324505, - "grad_norm": 2.2954776287078857, - "learning_rate": 5.188025727795084e-06, - "loss": 1.0942, - "step": 2632 - }, - { - "epoch": 1.992809839167455, - "grad_norm": 1.9560728073120117, - "learning_rate": 5.180990664038954e-06, - "loss": 1.0801, - "step": 2633 - }, - { - "epoch": 1.9935666982024598, - "grad_norm": 2.1281726360321045, - "learning_rate": 5.17395870718734e-06, - "loss": 1.1274, - "step": 2634 - }, - { - "epoch": 1.9943235572374647, - "grad_norm": 1.8601038455963135, - "learning_rate": 5.166929861772925e-06, - "loss": 1.0999, - "step": 2635 - }, - { - "epoch": 1.9950804162724691, - "grad_norm": 2.056415319442749, - "learning_rate": 5.159904132326399e-06, - "loss": 1.0721, - "step": 2636 - }, - { - "epoch": 1.995837275307474, - "grad_norm": 1.9574776887893677, - "learning_rate": 5.152881523376445e-06, - "loss": 1.1364, - "step": 2637 - }, - { - "epoch": 1.9965941343424787, - "grad_norm": 2.011434555053711, - "learning_rate": 5.145862039449723e-06, - "loss": 1.1044, - "step": 2638 - }, - { - "epoch": 1.9973509933774833, - "grad_norm": 1.9725828170776367, - "learning_rate": 5.138845685070891e-06, - "loss": 1.1425, - "step": 2639 - }, - { - "epoch": 1.9981078524124882, - "grad_norm": 2.337836742401123, - "learning_rate": 5.131832464762576e-06, - "loss": 1.1062, - "step": 2640 - }, - { - "epoch": 1.998864711447493, - "grad_norm": 2.0006585121154785, - "learning_rate": 5.1248223830454e-06, - "loss": 1.1117, - "step": 2641 - }, - { - "epoch": 1.9996215704824976, - "grad_norm": 2.063220977783203, - "learning_rate": 5.117815444437956e-06, - "loss": 1.0999, - "step": 2642 - }, - { - "epoch": 2.0003784295175024, - "grad_norm": 2.052854537963867, - "learning_rate": 5.110811653456801e-06, - "loss": 1.099, - "step": 2643 - }, - { - "epoch": 2.001135288552507, - "grad_norm": 2.046462059020996, - "learning_rate": 5.103811014616479e-06, - "loss": 1.0824, - "step": 2644 - }, - { - "epoch": 2.0018921475875118, - "grad_norm": 2.0290746688842773, - "learning_rate": 5.096813532429496e-06, - "loss": 1.0797, - "step": 2645 - }, - { - "epoch": 2.0026490066225167, - "grad_norm": 2.0245625972747803, - "learning_rate": 5.089819211406316e-06, - "loss": 1.078, - "step": 2646 - }, - { - "epoch": 2.003405865657521, - "grad_norm": 2.207991600036621, - "learning_rate": 5.082828056055375e-06, - "loss": 1.0669, - "step": 2647 - }, - { - "epoch": 2.004162724692526, - "grad_norm": 2.133127212524414, - "learning_rate": 5.075840070883069e-06, - "loss": 1.0906, - "step": 2648 - }, - { - "epoch": 2.004919583727531, - "grad_norm": 2.1217539310455322, - "learning_rate": 5.068855260393739e-06, - "loss": 1.1084, - "step": 2649 - }, - { - "epoch": 2.0056764427625353, - "grad_norm": 2.1101694107055664, - "learning_rate": 5.061873629089693e-06, - "loss": 1.0727, - "step": 2650 - }, - { - "epoch": 2.00643330179754, - "grad_norm": 2.0910747051239014, - "learning_rate": 5.054895181471185e-06, - "loss": 1.0533, - "step": 2651 - }, - { - "epoch": 2.007190160832545, - "grad_norm": 1.8526837825775146, - "learning_rate": 5.0479199220364085e-06, - "loss": 1.0245, - "step": 2652 - }, - { - "epoch": 2.0079470198675495, - "grad_norm": 2.0023531913757324, - "learning_rate": 5.040947855281515e-06, - "loss": 1.096, - "step": 2653 - }, - { - "epoch": 2.0087038789025544, - "grad_norm": 1.9686319828033447, - "learning_rate": 5.033978985700592e-06, - "loss": 1.0526, - "step": 2654 - }, - { - "epoch": 2.0094607379375593, - "grad_norm": 1.999506950378418, - "learning_rate": 5.02701331778567e-06, - "loss": 1.0875, - "step": 2655 - }, - { - "epoch": 2.0102175969725637, - "grad_norm": 2.096315860748291, - "learning_rate": 5.020050856026703e-06, - "loss": 1.0637, - "step": 2656 - }, - { - "epoch": 2.0109744560075686, - "grad_norm": 2.0473968982696533, - "learning_rate": 5.013091604911594e-06, - "loss": 1.1281, - "step": 2657 - }, - { - "epoch": 2.0117313150425735, - "grad_norm": 2.0134975910186768, - "learning_rate": 5.006135568926175e-06, - "loss": 1.0641, - "step": 2658 - }, - { - "epoch": 2.012488174077578, - "grad_norm": 1.9990431070327759, - "learning_rate": 4.999182752554189e-06, - "loss": 1.1031, - "step": 2659 - }, - { - "epoch": 2.013245033112583, - "grad_norm": 2.199671983718872, - "learning_rate": 4.992233160277321e-06, - "loss": 1.1043, - "step": 2660 - }, - { - "epoch": 2.0140018921475873, - "grad_norm": 2.0332555770874023, - "learning_rate": 4.985286796575174e-06, - "loss": 1.1057, - "step": 2661 - }, - { - "epoch": 2.014758751182592, - "grad_norm": 2.1199121475219727, - "learning_rate": 4.978343665925269e-06, - "loss": 1.036, - "step": 2662 - }, - { - "epoch": 2.015515610217597, - "grad_norm": 2.047947883605957, - "learning_rate": 4.9714037728030415e-06, - "loss": 1.0934, - "step": 2663 - }, - { - "epoch": 2.0162724692526015, - "grad_norm": 1.814427137374878, - "learning_rate": 4.964467121681834e-06, - "loss": 1.0809, - "step": 2664 - }, - { - "epoch": 2.0170293282876064, - "grad_norm": 2.189452648162842, - "learning_rate": 4.957533717032911e-06, - "loss": 1.0565, - "step": 2665 - }, - { - "epoch": 2.0177861873226113, - "grad_norm": 2.025991201400757, - "learning_rate": 4.95060356332544e-06, - "loss": 1.0633, - "step": 2666 - }, - { - "epoch": 2.0185430463576157, - "grad_norm": 2.3097431659698486, - "learning_rate": 4.943676665026492e-06, - "loss": 1.0527, - "step": 2667 - }, - { - "epoch": 2.0192999053926206, - "grad_norm": 2.235900402069092, - "learning_rate": 4.936753026601047e-06, - "loss": 1.0878, - "step": 2668 - }, - { - "epoch": 2.0200567644276255, - "grad_norm": 2.0237877368927, - "learning_rate": 4.929832652511963e-06, - "loss": 1.0243, - "step": 2669 - }, - { - "epoch": 2.02081362346263, - "grad_norm": 2.148148536682129, - "learning_rate": 4.922915547220014e-06, - "loss": 1.05, - "step": 2670 - }, - { - "epoch": 2.021570482497635, - "grad_norm": 2.204345464706421, - "learning_rate": 4.91600171518386e-06, - "loss": 1.0613, - "step": 2671 - }, - { - "epoch": 2.0223273415326397, - "grad_norm": 2.05426287651062, - "learning_rate": 4.909091160860053e-06, - "loss": 1.0683, - "step": 2672 - }, - { - "epoch": 2.023084200567644, - "grad_norm": 2.0507991313934326, - "learning_rate": 4.902183888703029e-06, - "loss": 1.1039, - "step": 2673 - }, - { - "epoch": 2.023841059602649, - "grad_norm": 2.111011505126953, - "learning_rate": 4.895279903165118e-06, - "loss": 1.0708, - "step": 2674 - }, - { - "epoch": 2.024597918637654, - "grad_norm": 2.152397871017456, - "learning_rate": 4.888379208696516e-06, - "loss": 1.135, - "step": 2675 - }, - { - "epoch": 2.0253547776726584, - "grad_norm": 2.062863826751709, - "learning_rate": 4.881481809745303e-06, - "loss": 1.0808, - "step": 2676 - }, - { - "epoch": 2.0261116367076633, - "grad_norm": 2.052548885345459, - "learning_rate": 4.874587710757442e-06, - "loss": 1.0729, - "step": 2677 - }, - { - "epoch": 2.026868495742668, - "grad_norm": 2.1231849193573, - "learning_rate": 4.8676969161767625e-06, - "loss": 1.13, - "step": 2678 - }, - { - "epoch": 2.0276253547776726, - "grad_norm": 1.9931443929672241, - "learning_rate": 4.860809430444969e-06, - "loss": 1.0863, - "step": 2679 - }, - { - "epoch": 2.0283822138126775, - "grad_norm": 1.980806589126587, - "learning_rate": 4.853925258001626e-06, - "loss": 1.1208, - "step": 2680 - }, - { - "epoch": 2.029139072847682, - "grad_norm": 2.0515875816345215, - "learning_rate": 4.847044403284166e-06, - "loss": 1.0301, - "step": 2681 - }, - { - "epoch": 2.029895931882687, - "grad_norm": 2.115715742111206, - "learning_rate": 4.840166870727887e-06, - "loss": 1.0677, - "step": 2682 - }, - { - "epoch": 2.0306527909176917, - "grad_norm": 1.9753094911575317, - "learning_rate": 4.833292664765935e-06, - "loss": 1.0814, - "step": 2683 - }, - { - "epoch": 2.031409649952696, - "grad_norm": 2.0974655151367188, - "learning_rate": 4.8264217898293226e-06, - "loss": 1.0553, - "step": 2684 - }, - { - "epoch": 2.032166508987701, - "grad_norm": 1.9045485258102417, - "learning_rate": 4.8195542503469145e-06, - "loss": 1.1018, - "step": 2685 - }, - { - "epoch": 2.032923368022706, - "grad_norm": 2.1343581676483154, - "learning_rate": 4.812690050745413e-06, - "loss": 1.1279, - "step": 2686 - }, - { - "epoch": 2.0336802270577103, - "grad_norm": 2.060368776321411, - "learning_rate": 4.805829195449382e-06, - "loss": 1.1021, - "step": 2687 - }, - { - "epoch": 2.0344370860927152, - "grad_norm": 2.2184059619903564, - "learning_rate": 4.798971688881224e-06, - "loss": 1.0819, - "step": 2688 - }, - { - "epoch": 2.03519394512772, - "grad_norm": 2.1077048778533936, - "learning_rate": 4.792117535461187e-06, - "loss": 1.0469, - "step": 2689 - }, - { - "epoch": 2.0359508041627246, - "grad_norm": 2.105867624282837, - "learning_rate": 4.7852667396073475e-06, - "loss": 1.1129, - "step": 2690 - }, - { - "epoch": 2.0367076631977294, - "grad_norm": 1.9376499652862549, - "learning_rate": 4.7784193057356234e-06, - "loss": 1.0889, - "step": 2691 - }, - { - "epoch": 2.0374645222327343, - "grad_norm": 2.1429734230041504, - "learning_rate": 4.771575238259769e-06, - "loss": 1.06, - "step": 2692 - }, - { - "epoch": 2.0382213812677388, - "grad_norm": 2.1066880226135254, - "learning_rate": 4.764734541591365e-06, - "loss": 1.0841, - "step": 2693 - }, - { - "epoch": 2.0389782403027437, - "grad_norm": 2.034998655319214, - "learning_rate": 4.757897220139822e-06, - "loss": 1.1155, - "step": 2694 - }, - { - "epoch": 2.0397350993377485, - "grad_norm": 2.0084969997406006, - "learning_rate": 4.751063278312371e-06, - "loss": 1.0365, - "step": 2695 - }, - { - "epoch": 2.040491958372753, - "grad_norm": 2.260364055633545, - "learning_rate": 4.744232720514074e-06, - "loss": 1.0722, - "step": 2696 - }, - { - "epoch": 2.041248817407758, - "grad_norm": 2.0515432357788086, - "learning_rate": 4.737405551147791e-06, - "loss": 1.1263, - "step": 2697 - }, - { - "epoch": 2.0420056764427623, - "grad_norm": 2.0565128326416016, - "learning_rate": 4.7305817746142186e-06, - "loss": 1.0697, - "step": 2698 - }, - { - "epoch": 2.042762535477767, - "grad_norm": 2.287740707397461, - "learning_rate": 4.723761395311858e-06, - "loss": 1.0616, - "step": 2699 - }, - { - "epoch": 2.043519394512772, - "grad_norm": 2.0816521644592285, - "learning_rate": 4.716944417637024e-06, - "loss": 1.0425, - "step": 2700 - }, - { - "epoch": 2.0442762535477765, - "grad_norm": 2.117865562438965, - "learning_rate": 4.710130845983837e-06, - "loss": 1.1141, - "step": 2701 - }, - { - "epoch": 2.0450331125827814, - "grad_norm": 1.9112534523010254, - "learning_rate": 4.703320684744216e-06, - "loss": 1.06, - "step": 2702 - }, - { - "epoch": 2.0457899716177863, - "grad_norm": 2.1456851959228516, - "learning_rate": 4.696513938307894e-06, - "loss": 1.0512, - "step": 2703 - }, - { - "epoch": 2.0465468306527907, - "grad_norm": 2.6872496604919434, - "learning_rate": 4.689710611062389e-06, - "loss": 1.0907, - "step": 2704 - }, - { - "epoch": 2.0473036896877956, - "grad_norm": 2.116586446762085, - "learning_rate": 4.682910707393024e-06, - "loss": 1.1179, - "step": 2705 - }, - { - "epoch": 2.0480605487228005, - "grad_norm": 2.1400527954101562, - "learning_rate": 4.676114231682915e-06, - "loss": 1.0673, - "step": 2706 - }, - { - "epoch": 2.048817407757805, - "grad_norm": 2.1281938552856445, - "learning_rate": 4.669321188312969e-06, - "loss": 1.0719, - "step": 2707 - }, - { - "epoch": 2.04957426679281, - "grad_norm": 2.1432082653045654, - "learning_rate": 4.662531581661873e-06, - "loss": 1.0844, - "step": 2708 - }, - { - "epoch": 2.0503311258278147, - "grad_norm": 1.9638357162475586, - "learning_rate": 4.655745416106105e-06, - "loss": 1.0379, - "step": 2709 - }, - { - "epoch": 2.051087984862819, - "grad_norm": 2.069023609161377, - "learning_rate": 4.648962696019928e-06, - "loss": 1.0808, - "step": 2710 - }, - { - "epoch": 2.051844843897824, - "grad_norm": 1.974176287651062, - "learning_rate": 4.6421834257753745e-06, - "loss": 1.1431, - "step": 2711 - }, - { - "epoch": 2.052601702932829, - "grad_norm": 2.0162038803100586, - "learning_rate": 4.635407609742265e-06, - "loss": 1.0715, - "step": 2712 - }, - { - "epoch": 2.0533585619678334, - "grad_norm": 2.0583693981170654, - "learning_rate": 4.628635252288178e-06, - "loss": 1.0583, - "step": 2713 - }, - { - "epoch": 2.0541154210028383, - "grad_norm": 1.934477686882019, - "learning_rate": 4.621866357778479e-06, - "loss": 1.071, - "step": 2714 - }, - { - "epoch": 2.054872280037843, - "grad_norm": 2.2252588272094727, - "learning_rate": 4.61510093057629e-06, - "loss": 1.1205, - "step": 2715 - }, - { - "epoch": 2.0556291390728476, - "grad_norm": 2.0399527549743652, - "learning_rate": 4.608338975042509e-06, - "loss": 1.1, - "step": 2716 - }, - { - "epoch": 2.0563859981078525, - "grad_norm": 2.1194961071014404, - "learning_rate": 4.601580495535781e-06, - "loss": 1.0566, - "step": 2717 - }, - { - "epoch": 2.057142857142857, - "grad_norm": 2.1798765659332275, - "learning_rate": 4.594825496412527e-06, - "loss": 1.07, - "step": 2718 - }, - { - "epoch": 2.057899716177862, - "grad_norm": 2.00516414642334, - "learning_rate": 4.588073982026908e-06, - "loss": 1.0404, - "step": 2719 - }, - { - "epoch": 2.0586565752128667, - "grad_norm": 2.1549298763275146, - "learning_rate": 4.581325956730851e-06, - "loss": 1.0873, - "step": 2720 - }, - { - "epoch": 2.059413434247871, - "grad_norm": 2.3754074573516846, - "learning_rate": 4.574581424874031e-06, - "loss": 1.0917, - "step": 2721 - }, - { - "epoch": 2.060170293282876, - "grad_norm": 2.118363857269287, - "learning_rate": 4.56784039080387e-06, - "loss": 1.0864, - "step": 2722 - }, - { - "epoch": 2.060927152317881, - "grad_norm": 1.9879770278930664, - "learning_rate": 4.561102858865542e-06, - "loss": 1.047, - "step": 2723 - }, - { - "epoch": 2.0616840113528854, - "grad_norm": 2.0962250232696533, - "learning_rate": 4.554368833401944e-06, - "loss": 1.0803, - "step": 2724 - }, - { - "epoch": 2.0624408703878903, - "grad_norm": 2.095574378967285, - "learning_rate": 4.547638318753733e-06, - "loss": 1.1101, - "step": 2725 - }, - { - "epoch": 2.063197729422895, - "grad_norm": 2.2542734146118164, - "learning_rate": 4.540911319259297e-06, - "loss": 1.0672, - "step": 2726 - }, - { - "epoch": 2.0639545884578996, - "grad_norm": 2.1071441173553467, - "learning_rate": 4.534187839254755e-06, - "loss": 1.0295, - "step": 2727 - }, - { - "epoch": 2.0647114474929045, - "grad_norm": 2.2289743423461914, - "learning_rate": 4.527467883073962e-06, - "loss": 1.0759, - "step": 2728 - }, - { - "epoch": 2.0654683065279094, - "grad_norm": 2.20210862159729, - "learning_rate": 4.520751455048502e-06, - "loss": 1.0952, - "step": 2729 - }, - { - "epoch": 2.066225165562914, - "grad_norm": 2.0890111923217773, - "learning_rate": 4.5140385595076795e-06, - "loss": 1.1055, - "step": 2730 - }, - { - "epoch": 2.0669820245979187, - "grad_norm": 2.0093884468078613, - "learning_rate": 4.507329200778518e-06, - "loss": 1.0836, - "step": 2731 - }, - { - "epoch": 2.0677388836329236, - "grad_norm": 2.171649932861328, - "learning_rate": 4.500623383185774e-06, - "loss": 1.0794, - "step": 2732 - }, - { - "epoch": 2.068495742667928, - "grad_norm": 1.8796758651733398, - "learning_rate": 4.493921111051916e-06, - "loss": 1.084, - "step": 2733 - }, - { - "epoch": 2.069252601702933, - "grad_norm": 2.1491153240203857, - "learning_rate": 4.487222388697128e-06, - "loss": 1.0629, - "step": 2734 - }, - { - "epoch": 2.0700094607379373, - "grad_norm": 2.3227274417877197, - "learning_rate": 4.4805272204392965e-06, - "loss": 1.0901, - "step": 2735 - }, - { - "epoch": 2.0707663197729422, - "grad_norm": 2.256610631942749, - "learning_rate": 4.47383561059403e-06, - "loss": 1.1046, - "step": 2736 - }, - { - "epoch": 2.071523178807947, - "grad_norm": 1.9754210710525513, - "learning_rate": 4.467147563474642e-06, - "loss": 1.0433, - "step": 2737 - }, - { - "epoch": 2.0722800378429516, - "grad_norm": 2.2307772636413574, - "learning_rate": 4.460463083392139e-06, - "loss": 1.079, - "step": 2738 - }, - { - "epoch": 2.0730368968779564, - "grad_norm": 1.8400083780288696, - "learning_rate": 4.453782174655236e-06, - "loss": 1.0615, - "step": 2739 - }, - { - "epoch": 2.0737937559129613, - "grad_norm": 1.9424253702163696, - "learning_rate": 4.447104841570351e-06, - "loss": 1.0318, - "step": 2740 - }, - { - "epoch": 2.0745506149479658, - "grad_norm": 2.008769989013672, - "learning_rate": 4.440431088441582e-06, - "loss": 1.0861, - "step": 2741 - }, - { - "epoch": 2.0753074739829707, - "grad_norm": 2.024463415145874, - "learning_rate": 4.4337609195707325e-06, - "loss": 1.0406, - "step": 2742 - }, - { - "epoch": 2.0760643330179755, - "grad_norm": 2.0557620525360107, - "learning_rate": 4.4270943392572924e-06, - "loss": 1.0983, - "step": 2743 - }, - { - "epoch": 2.07682119205298, - "grad_norm": 2.7445693016052246, - "learning_rate": 4.420431351798441e-06, - "loss": 1.1253, - "step": 2744 - }, - { - "epoch": 2.077578051087985, - "grad_norm": 2.1181790828704834, - "learning_rate": 4.413771961489035e-06, - "loss": 1.0808, - "step": 2745 - }, - { - "epoch": 2.0783349101229898, - "grad_norm": 2.035220146179199, - "learning_rate": 4.4071161726216116e-06, - "loss": 1.0242, - "step": 2746 - }, - { - "epoch": 2.079091769157994, - "grad_norm": 2.0690808296203613, - "learning_rate": 4.4004639894863945e-06, - "loss": 1.072, - "step": 2747 - }, - { - "epoch": 2.079848628192999, - "grad_norm": 2.145026683807373, - "learning_rate": 4.39381541637128e-06, - "loss": 1.0455, - "step": 2748 - }, - { - "epoch": 2.080605487228004, - "grad_norm": 1.9162312746047974, - "learning_rate": 4.387170457561837e-06, - "loss": 1.073, - "step": 2749 - }, - { - "epoch": 2.0813623462630084, - "grad_norm": 2.1280105113983154, - "learning_rate": 4.380529117341305e-06, - "loss": 1.151, - "step": 2750 - }, - { - "epoch": 2.0821192052980133, - "grad_norm": 2.1977860927581787, - "learning_rate": 4.373891399990595e-06, - "loss": 1.0732, - "step": 2751 - }, - { - "epoch": 2.0828760643330178, - "grad_norm": 2.101363182067871, - "learning_rate": 4.367257309788268e-06, - "loss": 1.0534, - "step": 2752 - }, - { - "epoch": 2.0836329233680226, - "grad_norm": 2.0512733459472656, - "learning_rate": 4.360626851010562e-06, - "loss": 1.0613, - "step": 2753 - }, - { - "epoch": 2.0843897824030275, - "grad_norm": 2.4588279724121094, - "learning_rate": 4.35400002793137e-06, - "loss": 1.058, - "step": 2754 - }, - { - "epoch": 2.085146641438032, - "grad_norm": 2.0685465335845947, - "learning_rate": 4.347376844822242e-06, - "loss": 1.0532, - "step": 2755 - }, - { - "epoch": 2.085903500473037, - "grad_norm": 2.1436235904693604, - "learning_rate": 4.340757305952384e-06, - "loss": 1.0618, - "step": 2756 - }, - { - "epoch": 2.0866603595080417, - "grad_norm": 2.1337039470672607, - "learning_rate": 4.334141415588644e-06, - "loss": 1.0852, - "step": 2757 - }, - { - "epoch": 2.087417218543046, - "grad_norm": 1.9831031560897827, - "learning_rate": 4.3275291779955245e-06, - "loss": 1.1146, - "step": 2758 - }, - { - "epoch": 2.088174077578051, - "grad_norm": 2.2047150135040283, - "learning_rate": 4.320920597435174e-06, - "loss": 1.0817, - "step": 2759 - }, - { - "epoch": 2.088930936613056, - "grad_norm": 2.065563201904297, - "learning_rate": 4.3143156781673846e-06, - "loss": 1.1424, - "step": 2760 - }, - { - "epoch": 2.0896877956480604, - "grad_norm": 2.1660046577453613, - "learning_rate": 4.307714424449583e-06, - "loss": 1.1019, - "step": 2761 - }, - { - "epoch": 2.0904446546830653, - "grad_norm": 2.0400032997131348, - "learning_rate": 4.301116840536844e-06, - "loss": 1.0345, - "step": 2762 - }, - { - "epoch": 2.09120151371807, - "grad_norm": 1.9771476984024048, - "learning_rate": 4.29452293068186e-06, - "loss": 1.0576, - "step": 2763 - }, - { - "epoch": 2.0919583727530746, - "grad_norm": 2.2237024307250977, - "learning_rate": 4.287932699134973e-06, - "loss": 1.0808, - "step": 2764 - }, - { - "epoch": 2.0927152317880795, - "grad_norm": 2.0638787746429443, - "learning_rate": 4.281346150144139e-06, - "loss": 1.0315, - "step": 2765 - }, - { - "epoch": 2.0934720908230844, - "grad_norm": 2.370335817337036, - "learning_rate": 4.27476328795495e-06, - "loss": 1.1243, - "step": 2766 - }, - { - "epoch": 2.094228949858089, - "grad_norm": 2.069380521774292, - "learning_rate": 4.268184116810623e-06, - "loss": 1.0801, - "step": 2767 - }, - { - "epoch": 2.0949858088930937, - "grad_norm": 2.310030221939087, - "learning_rate": 4.261608640951981e-06, - "loss": 1.0666, - "step": 2768 - }, - { - "epoch": 2.0957426679280986, - "grad_norm": 2.0185890197753906, - "learning_rate": 4.255036864617483e-06, - "loss": 1.0817, - "step": 2769 - }, - { - "epoch": 2.096499526963103, - "grad_norm": 2.0323379039764404, - "learning_rate": 4.248468792043194e-06, - "loss": 1.057, - "step": 2770 - }, - { - "epoch": 2.097256385998108, - "grad_norm": 2.362914562225342, - "learning_rate": 4.241904427462797e-06, - "loss": 1.0846, - "step": 2771 - }, - { - "epoch": 2.0980132450331124, - "grad_norm": 2.203740119934082, - "learning_rate": 4.235343775107575e-06, - "loss": 1.0565, - "step": 2772 - }, - { - "epoch": 2.0987701040681173, - "grad_norm": 2.006248712539673, - "learning_rate": 4.22878683920643e-06, - "loss": 1.1263, - "step": 2773 - }, - { - "epoch": 2.099526963103122, - "grad_norm": 2.120445489883423, - "learning_rate": 4.222233623985858e-06, - "loss": 1.0629, - "step": 2774 - }, - { - "epoch": 2.1002838221381266, - "grad_norm": 2.015179395675659, - "learning_rate": 4.2156841336699625e-06, - "loss": 1.0304, - "step": 2775 - }, - { - "epoch": 2.1010406811731315, - "grad_norm": 2.1381468772888184, - "learning_rate": 4.209138372480447e-06, - "loss": 1.0669, - "step": 2776 - }, - { - "epoch": 2.1017975402081364, - "grad_norm": 2.2807891368865967, - "learning_rate": 4.202596344636609e-06, - "loss": 1.0635, - "step": 2777 - }, - { - "epoch": 2.102554399243141, - "grad_norm": 2.1004843711853027, - "learning_rate": 4.196058054355347e-06, - "loss": 1.1306, - "step": 2778 - }, - { - "epoch": 2.1033112582781457, - "grad_norm": 2.092963695526123, - "learning_rate": 4.189523505851129e-06, - "loss": 1.0561, - "step": 2779 - }, - { - "epoch": 2.1040681173131506, - "grad_norm": 2.0627875328063965, - "learning_rate": 4.1829927033360314e-06, - "loss": 1.0671, - "step": 2780 - }, - { - "epoch": 2.104824976348155, - "grad_norm": 2.0852344036102295, - "learning_rate": 4.17646565101971e-06, - "loss": 1.056, - "step": 2781 - }, - { - "epoch": 2.10558183538316, - "grad_norm": 2.0920495986938477, - "learning_rate": 4.1699423531094065e-06, - "loss": 1.0415, - "step": 2782 - }, - { - "epoch": 2.106338694418165, - "grad_norm": 2.3179705142974854, - "learning_rate": 4.163422813809934e-06, - "loss": 1.0648, - "step": 2783 - }, - { - "epoch": 2.1070955534531692, - "grad_norm": 2.0878725051879883, - "learning_rate": 4.156907037323696e-06, - "loss": 1.0996, - "step": 2784 - }, - { - "epoch": 2.107852412488174, - "grad_norm": 2.1616759300231934, - "learning_rate": 4.1503950278506565e-06, - "loss": 1.0473, - "step": 2785 - }, - { - "epoch": 2.108609271523179, - "grad_norm": 2.314814805984497, - "learning_rate": 4.1438867895883555e-06, - "loss": 1.0717, - "step": 2786 - }, - { - "epoch": 2.1093661305581834, - "grad_norm": 2.105376958847046, - "learning_rate": 4.137382326731906e-06, - "loss": 1.1177, - "step": 2787 - }, - { - "epoch": 2.1101229895931883, - "grad_norm": 2.18996000289917, - "learning_rate": 4.130881643473987e-06, - "loss": 1.0923, - "step": 2788 - }, - { - "epoch": 2.1108798486281932, - "grad_norm": 2.0627288818359375, - "learning_rate": 4.124384744004844e-06, - "loss": 1.0307, - "step": 2789 - }, - { - "epoch": 2.1116367076631977, - "grad_norm": 2.2541861534118652, - "learning_rate": 4.117891632512271e-06, - "loss": 1.0543, - "step": 2790 - }, - { - "epoch": 2.1123935666982026, - "grad_norm": 2.0544228553771973, - "learning_rate": 4.111402313181631e-06, - "loss": 1.0987, - "step": 2791 - }, - { - "epoch": 2.113150425733207, - "grad_norm": 2.1496474742889404, - "learning_rate": 4.1049167901958454e-06, - "loss": 1.0422, - "step": 2792 - }, - { - "epoch": 2.113907284768212, - "grad_norm": 2.1363749504089355, - "learning_rate": 4.098435067735377e-06, - "loss": 1.0371, - "step": 2793 - }, - { - "epoch": 2.1146641438032168, - "grad_norm": 2.166128635406494, - "learning_rate": 4.091957149978247e-06, - "loss": 1.056, - "step": 2794 - }, - { - "epoch": 2.115421002838221, - "grad_norm": 2.3086111545562744, - "learning_rate": 4.085483041100028e-06, - "loss": 1.0582, - "step": 2795 - }, - { - "epoch": 2.116177861873226, - "grad_norm": 2.0368103981018066, - "learning_rate": 4.079012745273822e-06, - "loss": 0.9679, - "step": 2796 - }, - { - "epoch": 2.116934720908231, - "grad_norm": 2.146679639816284, - "learning_rate": 4.072546266670289e-06, - "loss": 1.0472, - "step": 2797 - }, - { - "epoch": 2.1176915799432354, - "grad_norm": 2.188101291656494, - "learning_rate": 4.0660836094576215e-06, - "loss": 1.1283, - "step": 2798 - }, - { - "epoch": 2.1184484389782403, - "grad_norm": 2.099888563156128, - "learning_rate": 4.059624777801554e-06, - "loss": 1.0708, - "step": 2799 - }, - { - "epoch": 2.119205298013245, - "grad_norm": 2.088252544403076, - "learning_rate": 4.053169775865346e-06, - "loss": 1.0619, - "step": 2800 - }, - { - "epoch": 2.1199621570482496, - "grad_norm": 2.0278518199920654, - "learning_rate": 4.046718607809791e-06, - "loss": 1.0549, - "step": 2801 - }, - { - "epoch": 2.1207190160832545, - "grad_norm": 1.9221056699752808, - "learning_rate": 4.040271277793217e-06, - "loss": 1.0776, - "step": 2802 - }, - { - "epoch": 2.1214758751182594, - "grad_norm": 2.296339511871338, - "learning_rate": 4.033827789971474e-06, - "loss": 1.0686, - "step": 2803 - }, - { - "epoch": 2.122232734153264, - "grad_norm": 2.1365742683410645, - "learning_rate": 4.027388148497936e-06, - "loss": 1.0812, - "step": 2804 - }, - { - "epoch": 2.1229895931882687, - "grad_norm": 1.9683605432510376, - "learning_rate": 4.020952357523498e-06, - "loss": 1.0168, - "step": 2805 - }, - { - "epoch": 2.1237464522232736, - "grad_norm": 2.0199337005615234, - "learning_rate": 4.014520421196579e-06, - "loss": 1.1035, - "step": 2806 - }, - { - "epoch": 2.124503311258278, - "grad_norm": 2.0269358158111572, - "learning_rate": 4.008092343663094e-06, - "loss": 1.0973, - "step": 2807 - }, - { - "epoch": 2.125260170293283, - "grad_norm": 2.286689519882202, - "learning_rate": 4.001668129066491e-06, - "loss": 1.0882, - "step": 2808 - }, - { - "epoch": 2.1260170293282874, - "grad_norm": 2.257807731628418, - "learning_rate": 3.995247781547721e-06, - "loss": 1.0877, - "step": 2809 - }, - { - "epoch": 2.1267738883632923, - "grad_norm": 2.049635171890259, - "learning_rate": 3.98883130524524e-06, - "loss": 1.0924, - "step": 2810 - }, - { - "epoch": 2.127530747398297, - "grad_norm": 2.112349033355713, - "learning_rate": 3.982418704295016e-06, - "loss": 1.0931, - "step": 2811 - }, - { - "epoch": 2.1282876064333016, - "grad_norm": 2.0468220710754395, - "learning_rate": 3.9760099828305104e-06, - "loss": 1.0842, - "step": 2812 - }, - { - "epoch": 2.1290444654683065, - "grad_norm": 2.0540926456451416, - "learning_rate": 3.969605144982682e-06, - "loss": 1.0924, - "step": 2813 - }, - { - "epoch": 2.1298013245033114, - "grad_norm": 2.1668741703033447, - "learning_rate": 3.963204194879998e-06, - "loss": 1.1271, - "step": 2814 - }, - { - "epoch": 2.130558183538316, - "grad_norm": 1.9331365823745728, - "learning_rate": 3.956807136648411e-06, - "loss": 1.1066, - "step": 2815 - }, - { - "epoch": 2.1313150425733207, - "grad_norm": 1.9183405637741089, - "learning_rate": 3.950413974411367e-06, - "loss": 1.1018, - "step": 2816 - }, - { - "epoch": 2.1320719016083256, - "grad_norm": 1.9769048690795898, - "learning_rate": 3.944024712289805e-06, - "loss": 1.0565, - "step": 2817 - }, - { - "epoch": 2.13282876064333, - "grad_norm": 1.961674690246582, - "learning_rate": 3.93763935440214e-06, - "loss": 1.0816, - "step": 2818 - }, - { - "epoch": 2.133585619678335, - "grad_norm": 2.0646157264709473, - "learning_rate": 3.931257904864283e-06, - "loss": 1.0373, - "step": 2819 - }, - { - "epoch": 2.13434247871334, - "grad_norm": 2.243910074234009, - "learning_rate": 3.92488036778961e-06, - "loss": 1.0423, - "step": 2820 - }, - { - "epoch": 2.1350993377483443, - "grad_norm": 2.192121744155884, - "learning_rate": 3.91850674728899e-06, - "loss": 1.0402, - "step": 2821 - }, - { - "epoch": 2.135856196783349, - "grad_norm": 2.0529327392578125, - "learning_rate": 3.912137047470764e-06, - "loss": 1.0676, - "step": 2822 - }, - { - "epoch": 2.136613055818354, - "grad_norm": 2.1558024883270264, - "learning_rate": 3.9057712724407366e-06, - "loss": 1.06, - "step": 2823 - }, - { - "epoch": 2.1373699148533585, - "grad_norm": 1.8848477602005005, - "learning_rate": 3.899409426302193e-06, - "loss": 1.0668, - "step": 2824 - }, - { - "epoch": 2.1381267738883634, - "grad_norm": 2.0971271991729736, - "learning_rate": 3.893051513155881e-06, - "loss": 1.1362, - "step": 2825 - }, - { - "epoch": 2.138883632923368, - "grad_norm": 2.3545618057250977, - "learning_rate": 3.88669753710002e-06, - "loss": 1.1014, - "step": 2826 - }, - { - "epoch": 2.1396404919583727, - "grad_norm": 2.0267715454101562, - "learning_rate": 3.880347502230277e-06, - "loss": 1.0955, - "step": 2827 - }, - { - "epoch": 2.1403973509933776, - "grad_norm": 2.065638780593872, - "learning_rate": 3.874001412639796e-06, - "loss": 1.0732, - "step": 2828 - }, - { - "epoch": 2.141154210028382, - "grad_norm": 2.221348285675049, - "learning_rate": 3.867659272419163e-06, - "loss": 1.0891, - "step": 2829 - }, - { - "epoch": 2.141911069063387, - "grad_norm": 2.0352323055267334, - "learning_rate": 3.861321085656425e-06, - "loss": 1.0615, - "step": 2830 - }, - { - "epoch": 2.142667928098392, - "grad_norm": 2.294567584991455, - "learning_rate": 3.854986856437086e-06, - "loss": 1.0886, - "step": 2831 - }, - { - "epoch": 2.1434247871333962, - "grad_norm": 2.132350444793701, - "learning_rate": 3.848656588844089e-06, - "loss": 1.0932, - "step": 2832 - }, - { - "epoch": 2.144181646168401, - "grad_norm": 2.0099170207977295, - "learning_rate": 3.842330286957837e-06, - "loss": 1.1081, - "step": 2833 - }, - { - "epoch": 2.144938505203406, - "grad_norm": 2.181610584259033, - "learning_rate": 3.836007954856154e-06, - "loss": 1.1125, - "step": 2834 - }, - { - "epoch": 2.1456953642384105, - "grad_norm": 2.204340934753418, - "learning_rate": 3.829689596614324e-06, - "loss": 1.1074, - "step": 2835 - }, - { - "epoch": 2.1464522232734153, - "grad_norm": 2.0872256755828857, - "learning_rate": 3.823375216305066e-06, - "loss": 1.0709, - "step": 2836 - }, - { - "epoch": 2.1472090823084202, - "grad_norm": 2.192131757736206, - "learning_rate": 3.8170648179985324e-06, - "loss": 1.0539, - "step": 2837 - }, - { - "epoch": 2.1479659413434247, - "grad_norm": 1.9136378765106201, - "learning_rate": 3.810758405762311e-06, - "loss": 1.0931, - "step": 2838 - }, - { - "epoch": 2.1487228003784296, - "grad_norm": 2.390619993209839, - "learning_rate": 3.8044559836614203e-06, - "loss": 1.0645, - "step": 2839 - }, - { - "epoch": 2.1494796594134344, - "grad_norm": 2.138697862625122, - "learning_rate": 3.798157555758304e-06, - "loss": 1.084, - "step": 2840 - }, - { - "epoch": 2.150236518448439, - "grad_norm": 1.9692342281341553, - "learning_rate": 3.791863126112828e-06, - "loss": 1.0735, - "step": 2841 - }, - { - "epoch": 2.1509933774834438, - "grad_norm": 2.02774715423584, - "learning_rate": 3.78557269878229e-06, - "loss": 1.0853, - "step": 2842 - }, - { - "epoch": 2.151750236518448, - "grad_norm": 1.9480324983596802, - "learning_rate": 3.779286277821402e-06, - "loss": 1.0504, - "step": 2843 - }, - { - "epoch": 2.152507095553453, - "grad_norm": 2.0497875213623047, - "learning_rate": 3.773003867282301e-06, - "loss": 1.0475, - "step": 2844 - }, - { - "epoch": 2.153263954588458, - "grad_norm": 2.0127995014190674, - "learning_rate": 3.766725471214524e-06, - "loss": 1.0773, - "step": 2845 - }, - { - "epoch": 2.1540208136234624, - "grad_norm": 1.920920729637146, - "learning_rate": 3.760451093665034e-06, - "loss": 1.0747, - "step": 2846 - }, - { - "epoch": 2.1547776726584673, - "grad_norm": 1.9610087871551514, - "learning_rate": 3.754180738678201e-06, - "loss": 1.0929, - "step": 2847 - }, - { - "epoch": 2.155534531693472, - "grad_norm": 2.1988742351531982, - "learning_rate": 3.7479144102957955e-06, - "loss": 1.0426, - "step": 2848 - }, - { - "epoch": 2.1562913907284766, - "grad_norm": 2.0719704627990723, - "learning_rate": 3.7416521125569987e-06, - "loss": 1.0965, - "step": 2849 - }, - { - "epoch": 2.1570482497634815, - "grad_norm": 2.073084592819214, - "learning_rate": 3.7353938494983966e-06, - "loss": 1.0428, - "step": 2850 - }, - { - "epoch": 2.1578051087984864, - "grad_norm": 1.9803792238235474, - "learning_rate": 3.729139625153964e-06, - "loss": 1.0724, - "step": 2851 - }, - { - "epoch": 2.158561967833491, - "grad_norm": 1.9936349391937256, - "learning_rate": 3.72288944355508e-06, - "loss": 1.0278, - "step": 2852 - }, - { - "epoch": 2.1593188268684957, - "grad_norm": 2.1690564155578613, - "learning_rate": 3.7166433087305177e-06, - "loss": 1.044, - "step": 2853 - }, - { - "epoch": 2.1600756859035006, - "grad_norm": 2.125483512878418, - "learning_rate": 3.7104012247064436e-06, - "loss": 1.0493, - "step": 2854 - }, - { - "epoch": 2.160832544938505, - "grad_norm": 2.031766653060913, - "learning_rate": 3.7041631955064067e-06, - "loss": 1.0746, - "step": 2855 - }, - { - "epoch": 2.16158940397351, - "grad_norm": 2.1385655403137207, - "learning_rate": 3.697929225151341e-06, - "loss": 1.0993, - "step": 2856 - }, - { - "epoch": 2.162346263008515, - "grad_norm": 2.363760471343994, - "learning_rate": 3.691699317659574e-06, - "loss": 1.0544, - "step": 2857 - }, - { - "epoch": 2.1631031220435193, - "grad_norm": 2.0311970710754395, - "learning_rate": 3.685473477046807e-06, - "loss": 1.0244, - "step": 2858 - }, - { - "epoch": 2.163859981078524, - "grad_norm": 2.2926740646362305, - "learning_rate": 3.679251707326123e-06, - "loss": 1.0813, - "step": 2859 - }, - { - "epoch": 2.164616840113529, - "grad_norm": 2.1094629764556885, - "learning_rate": 3.6730340125079804e-06, - "loss": 1.0729, - "step": 2860 - }, - { - "epoch": 2.1653736991485335, - "grad_norm": 2.2575571537017822, - "learning_rate": 3.6668203966002157e-06, - "loss": 1.1031, - "step": 2861 - }, - { - "epoch": 2.1661305581835384, - "grad_norm": 2.2293882369995117, - "learning_rate": 3.660610863608018e-06, - "loss": 1.0676, - "step": 2862 - }, - { - "epoch": 2.1668874172185433, - "grad_norm": 2.214388132095337, - "learning_rate": 3.6544054175339655e-06, - "loss": 1.1499, - "step": 2863 - }, - { - "epoch": 2.1676442762535477, - "grad_norm": 1.9468921422958374, - "learning_rate": 3.6482040623779925e-06, - "loss": 1.0726, - "step": 2864 - }, - { - "epoch": 2.1684011352885526, - "grad_norm": 2.0682532787323, - "learning_rate": 3.642006802137399e-06, - "loss": 1.0695, - "step": 2865 - }, - { - "epoch": 2.169157994323557, - "grad_norm": 2.1253714561462402, - "learning_rate": 3.6358136408068475e-06, - "loss": 1.116, - "step": 2866 - }, - { - "epoch": 2.169914853358562, - "grad_norm": 2.113579511642456, - "learning_rate": 3.6296245823783514e-06, - "loss": 1.0874, - "step": 2867 - }, - { - "epoch": 2.170671712393567, - "grad_norm": 1.9568238258361816, - "learning_rate": 3.623439630841282e-06, - "loss": 1.0363, - "step": 2868 - }, - { - "epoch": 2.1714285714285713, - "grad_norm": 2.3202977180480957, - "learning_rate": 3.6172587901823652e-06, - "loss": 1.0881, - "step": 2869 - }, - { - "epoch": 2.172185430463576, - "grad_norm": 2.232671022415161, - "learning_rate": 3.611082064385679e-06, - "loss": 1.0919, - "step": 2870 - }, - { - "epoch": 2.172942289498581, - "grad_norm": 1.9573837518692017, - "learning_rate": 3.6049094574326453e-06, - "loss": 1.017, - "step": 2871 - }, - { - "epoch": 2.1736991485335855, - "grad_norm": 2.110637664794922, - "learning_rate": 3.598740973302036e-06, - "loss": 1.1066, - "step": 2872 - }, - { - "epoch": 2.1744560075685904, - "grad_norm": 2.0418527126312256, - "learning_rate": 3.592576615969956e-06, - "loss": 1.0607, - "step": 2873 - }, - { - "epoch": 2.1752128666035953, - "grad_norm": 2.177208662033081, - "learning_rate": 3.5864163894098624e-06, - "loss": 1.0724, - "step": 2874 - }, - { - "epoch": 2.1759697256385997, - "grad_norm": 2.2924139499664307, - "learning_rate": 3.580260297592535e-06, - "loss": 1.0593, - "step": 2875 - }, - { - "epoch": 2.1767265846736046, - "grad_norm": 2.0772855281829834, - "learning_rate": 3.574108344486102e-06, - "loss": 1.0661, - "step": 2876 - }, - { - "epoch": 2.1774834437086095, - "grad_norm": 2.265467405319214, - "learning_rate": 3.5679605340560187e-06, - "loss": 1.0569, - "step": 2877 - }, - { - "epoch": 2.178240302743614, - "grad_norm": 2.154500722885132, - "learning_rate": 3.5618168702650713e-06, - "loss": 1.0332, - "step": 2878 - }, - { - "epoch": 2.178997161778619, - "grad_norm": 2.0559258460998535, - "learning_rate": 3.5556773570733666e-06, - "loss": 1.0697, - "step": 2879 - }, - { - "epoch": 2.1797540208136237, - "grad_norm": 2.4780728816986084, - "learning_rate": 3.5495419984383452e-06, - "loss": 1.0525, - "step": 2880 - }, - { - "epoch": 2.180510879848628, - "grad_norm": 2.0388307571411133, - "learning_rate": 3.543410798314767e-06, - "loss": 1.0224, - "step": 2881 - }, - { - "epoch": 2.181267738883633, - "grad_norm": 2.0700438022613525, - "learning_rate": 3.5372837606547056e-06, - "loss": 1.0795, - "step": 2882 - }, - { - "epoch": 2.1820245979186375, - "grad_norm": 2.08799409866333, - "learning_rate": 3.5311608894075606e-06, - "loss": 1.1147, - "step": 2883 - }, - { - "epoch": 2.1827814569536423, - "grad_norm": 2.04353928565979, - "learning_rate": 3.5250421885200357e-06, - "loss": 1.0693, - "step": 2884 - }, - { - "epoch": 2.1835383159886472, - "grad_norm": 2.1684114933013916, - "learning_rate": 3.5189276619361567e-06, - "loss": 1.0844, - "step": 2885 - }, - { - "epoch": 2.1842951750236517, - "grad_norm": 2.2811787128448486, - "learning_rate": 3.5128173135972515e-06, - "loss": 1.0921, - "step": 2886 - }, - { - "epoch": 2.1850520340586566, - "grad_norm": 2.293611764907837, - "learning_rate": 3.5067111474419603e-06, - "loss": 1.1276, - "step": 2887 - }, - { - "epoch": 2.1858088930936614, - "grad_norm": 1.9369990825653076, - "learning_rate": 3.5006091674062263e-06, - "loss": 1.0811, - "step": 2888 - }, - { - "epoch": 2.186565752128666, - "grad_norm": 2.1612861156463623, - "learning_rate": 3.494511377423291e-06, - "loss": 1.0987, - "step": 2889 - }, - { - "epoch": 2.1873226111636708, - "grad_norm": 2.301436424255371, - "learning_rate": 3.488417781423691e-06, - "loss": 1.1224, - "step": 2890 - }, - { - "epoch": 2.1880794701986757, - "grad_norm": 2.149083375930786, - "learning_rate": 3.482328383335271e-06, - "loss": 1.0906, - "step": 2891 - }, - { - "epoch": 2.18883632923368, - "grad_norm": 2.4687178134918213, - "learning_rate": 3.4762431870831625e-06, - "loss": 1.1381, - "step": 2892 - }, - { - "epoch": 2.189593188268685, - "grad_norm": 2.131269693374634, - "learning_rate": 3.4701621965897906e-06, - "loss": 1.0644, - "step": 2893 - }, - { - "epoch": 2.19035004730369, - "grad_norm": 2.0854032039642334, - "learning_rate": 3.464085415774874e-06, - "loss": 1.0703, - "step": 2894 - }, - { - "epoch": 2.1911069063386943, - "grad_norm": 1.988800287246704, - "learning_rate": 3.458012848555407e-06, - "loss": 1.0925, - "step": 2895 - }, - { - "epoch": 2.191863765373699, - "grad_norm": 2.0683155059814453, - "learning_rate": 3.451944498845673e-06, - "loss": 1.1212, - "step": 2896 - }, - { - "epoch": 2.192620624408704, - "grad_norm": 2.1208488941192627, - "learning_rate": 3.4458803705572385e-06, - "loss": 1.0917, - "step": 2897 - }, - { - "epoch": 2.1933774834437085, - "grad_norm": 1.9864528179168701, - "learning_rate": 3.4398204675989504e-06, - "loss": 1.1095, - "step": 2898 - }, - { - "epoch": 2.1941343424787134, - "grad_norm": 2.0708682537078857, - "learning_rate": 3.4337647938769283e-06, - "loss": 1.0989, - "step": 2899 - }, - { - "epoch": 2.194891201513718, - "grad_norm": 2.229597330093384, - "learning_rate": 3.4277133532945704e-06, - "loss": 1.1137, - "step": 2900 - }, - { - "epoch": 2.1956480605487227, - "grad_norm": 2.039870500564575, - "learning_rate": 3.4216661497525372e-06, - "loss": 1.0866, - "step": 2901 - }, - { - "epoch": 2.1964049195837276, - "grad_norm": 2.037367582321167, - "learning_rate": 3.4156231871487706e-06, - "loss": 1.0947, - "step": 2902 - }, - { - "epoch": 2.197161778618732, - "grad_norm": 2.3312087059020996, - "learning_rate": 3.4095844693784647e-06, - "loss": 1.0883, - "step": 2903 - }, - { - "epoch": 2.197918637653737, - "grad_norm": 2.1165080070495605, - "learning_rate": 3.4035500003340886e-06, - "loss": 1.07, - "step": 2904 - }, - { - "epoch": 2.198675496688742, - "grad_norm": 2.1637613773345947, - "learning_rate": 3.3975197839053727e-06, - "loss": 1.0012, - "step": 2905 - }, - { - "epoch": 2.1994323557237463, - "grad_norm": 2.1280291080474854, - "learning_rate": 3.3914938239792956e-06, - "loss": 1.0525, - "step": 2906 - }, - { - "epoch": 2.200189214758751, - "grad_norm": 2.1883440017700195, - "learning_rate": 3.385472124440102e-06, - "loss": 1.0466, - "step": 2907 - }, - { - "epoch": 2.200946073793756, - "grad_norm": 2.120882272720337, - "learning_rate": 3.3794546891692883e-06, - "loss": 1.0268, - "step": 2908 - }, - { - "epoch": 2.2017029328287605, - "grad_norm": 2.141380786895752, - "learning_rate": 3.3734415220456036e-06, - "loss": 1.0695, - "step": 2909 - }, - { - "epoch": 2.2024597918637654, - "grad_norm": 1.9636356830596924, - "learning_rate": 3.3674326269450386e-06, - "loss": 1.035, - "step": 2910 - }, - { - "epoch": 2.2032166508987703, - "grad_norm": 2.227339506149292, - "learning_rate": 3.361428007740842e-06, - "loss": 1.1143, - "step": 2911 - }, - { - "epoch": 2.2039735099337747, - "grad_norm": 2.206693172454834, - "learning_rate": 3.3554276683034933e-06, - "loss": 1.075, - "step": 2912 - }, - { - "epoch": 2.2047303689687796, - "grad_norm": 2.3205721378326416, - "learning_rate": 3.349431612500721e-06, - "loss": 1.0599, - "step": 2913 - }, - { - "epoch": 2.2054872280037845, - "grad_norm": 2.0222678184509277, - "learning_rate": 3.343439844197493e-06, - "loss": 1.083, - "step": 2914 - }, - { - "epoch": 2.206244087038789, - "grad_norm": 2.076840400695801, - "learning_rate": 3.337452367256012e-06, - "loss": 1.0306, - "step": 2915 - }, - { - "epoch": 2.207000946073794, - "grad_norm": 1.9618786573410034, - "learning_rate": 3.3314691855357197e-06, - "loss": 1.1021, - "step": 2916 - }, - { - "epoch": 2.2077578051087983, - "grad_norm": 2.168519973754883, - "learning_rate": 3.3254903028932716e-06, - "loss": 1.007, - "step": 2917 - }, - { - "epoch": 2.208514664143803, - "grad_norm": 2.0127992630004883, - "learning_rate": 3.3195157231825704e-06, - "loss": 1.0797, - "step": 2918 - }, - { - "epoch": 2.209271523178808, - "grad_norm": 2.0020880699157715, - "learning_rate": 3.3135454502547397e-06, - "loss": 1.1154, - "step": 2919 - }, - { - "epoch": 2.2100283822138125, - "grad_norm": 1.9836198091506958, - "learning_rate": 3.307579487958125e-06, - "loss": 1.0418, - "step": 2920 - }, - { - "epoch": 2.2107852412488174, - "grad_norm": 1.9691238403320312, - "learning_rate": 3.3016178401382957e-06, - "loss": 1.1094, - "step": 2921 - }, - { - "epoch": 2.2115421002838223, - "grad_norm": 2.1438305377960205, - "learning_rate": 3.2956605106380464e-06, - "loss": 1.0935, - "step": 2922 - }, - { - "epoch": 2.2122989593188267, - "grad_norm": 2.1357624530792236, - "learning_rate": 3.2897075032973656e-06, - "loss": 1.1033, - "step": 2923 - }, - { - "epoch": 2.2130558183538316, - "grad_norm": 2.027420997619629, - "learning_rate": 3.28375882195348e-06, - "loss": 1.0502, - "step": 2924 - }, - { - "epoch": 2.2138126773888365, - "grad_norm": 2.073096513748169, - "learning_rate": 3.2778144704408167e-06, - "loss": 1.0565, - "step": 2925 - }, - { - "epoch": 2.214569536423841, - "grad_norm": 2.12164306640625, - "learning_rate": 3.271874452591015e-06, - "loss": 1.103, - "step": 2926 - }, - { - "epoch": 2.215326395458846, - "grad_norm": 2.0933268070220947, - "learning_rate": 3.2659387722329226e-06, - "loss": 1.0776, - "step": 2927 - }, - { - "epoch": 2.2160832544938507, - "grad_norm": 2.033733367919922, - "learning_rate": 3.2600074331925834e-06, - "loss": 1.0642, - "step": 2928 - }, - { - "epoch": 2.216840113528855, - "grad_norm": 1.951857328414917, - "learning_rate": 3.2540804392932527e-06, - "loss": 1.0956, - "step": 2929 - }, - { - "epoch": 2.21759697256386, - "grad_norm": 2.0612125396728516, - "learning_rate": 3.2481577943553766e-06, - "loss": 1.0756, - "step": 2930 - }, - { - "epoch": 2.218353831598865, - "grad_norm": 1.9757081270217896, - "learning_rate": 3.2422395021966006e-06, - "loss": 1.0937, - "step": 2931 - }, - { - "epoch": 2.2191106906338693, - "grad_norm": 1.9480013847351074, - "learning_rate": 3.2363255666317706e-06, - "loss": 1.0986, - "step": 2932 - }, - { - "epoch": 2.2198675496688742, - "grad_norm": 2.087038040161133, - "learning_rate": 3.2304159914729194e-06, - "loss": 1.0518, - "step": 2933 - }, - { - "epoch": 2.2206244087038787, - "grad_norm": 2.119804859161377, - "learning_rate": 3.2245107805292625e-06, - "loss": 1.0356, - "step": 2934 - }, - { - "epoch": 2.2213812677388836, - "grad_norm": 2.2135863304138184, - "learning_rate": 3.2186099376072133e-06, - "loss": 1.0988, - "step": 2935 - }, - { - "epoch": 2.2221381267738884, - "grad_norm": 1.9379045963287354, - "learning_rate": 3.2127134665103684e-06, - "loss": 1.0762, - "step": 2936 - }, - { - "epoch": 2.222894985808893, - "grad_norm": 2.078213691711426, - "learning_rate": 3.206821371039495e-06, - "loss": 1.1031, - "step": 2937 - }, - { - "epoch": 2.2236518448438978, - "grad_norm": 1.7914735078811646, - "learning_rate": 3.2009336549925558e-06, - "loss": 1.0699, - "step": 2938 - }, - { - "epoch": 2.2244087038789027, - "grad_norm": 2.0808887481689453, - "learning_rate": 3.195050322164676e-06, - "loss": 1.0362, - "step": 2939 - }, - { - "epoch": 2.225165562913907, - "grad_norm": 2.2079849243164062, - "learning_rate": 3.1891713763481664e-06, - "loss": 1.0544, - "step": 2940 - }, - { - "epoch": 2.225922421948912, - "grad_norm": 2.137425184249878, - "learning_rate": 3.1832968213325056e-06, - "loss": 1.0465, - "step": 2941 - }, - { - "epoch": 2.226679280983917, - "grad_norm": 2.0021207332611084, - "learning_rate": 3.177426660904339e-06, - "loss": 1.0756, - "step": 2942 - }, - { - "epoch": 2.2274361400189213, - "grad_norm": 2.105543851852417, - "learning_rate": 3.1715608988474904e-06, - "loss": 1.0771, - "step": 2943 - }, - { - "epoch": 2.228192999053926, - "grad_norm": 1.925067663192749, - "learning_rate": 3.1656995389429347e-06, - "loss": 1.0919, - "step": 2944 - }, - { - "epoch": 2.228949858088931, - "grad_norm": 2.076474905014038, - "learning_rate": 3.159842584968813e-06, - "loss": 1.0684, - "step": 2945 - }, - { - "epoch": 2.2297067171239355, - "grad_norm": 2.0177693367004395, - "learning_rate": 3.15399004070043e-06, - "loss": 1.1303, - "step": 2946 - }, - { - "epoch": 2.2304635761589404, - "grad_norm": 1.9716448783874512, - "learning_rate": 3.1481419099102477e-06, - "loss": 1.0174, - "step": 2947 - }, - { - "epoch": 2.2312204351939453, - "grad_norm": 2.0462799072265625, - "learning_rate": 3.1422981963678823e-06, - "loss": 1.0466, - "step": 2948 - }, - { - "epoch": 2.2319772942289497, - "grad_norm": 2.242594003677368, - "learning_rate": 3.1364589038401055e-06, - "loss": 1.0786, - "step": 2949 - }, - { - "epoch": 2.2327341532639546, - "grad_norm": 2.011847972869873, - "learning_rate": 3.1306240360908325e-06, - "loss": 1.0735, - "step": 2950 - }, - { - "epoch": 2.2334910122989595, - "grad_norm": 1.8826643228530884, - "learning_rate": 3.124793596881128e-06, - "loss": 1.105, - "step": 2951 - }, - { - "epoch": 2.234247871333964, - "grad_norm": 2.173900604248047, - "learning_rate": 3.118967589969205e-06, - "loss": 1.0665, - "step": 2952 - }, - { - "epoch": 2.235004730368969, - "grad_norm": 2.065894365310669, - "learning_rate": 3.1131460191104214e-06, - "loss": 1.1092, - "step": 2953 - }, - { - "epoch": 2.2357615894039737, - "grad_norm": 2.1508965492248535, - "learning_rate": 3.107328888057271e-06, - "loss": 1.0692, - "step": 2954 - }, - { - "epoch": 2.236518448438978, - "grad_norm": 2.2163479328155518, - "learning_rate": 3.1015162005593918e-06, - "loss": 1.1466, - "step": 2955 - }, - { - "epoch": 2.237275307473983, - "grad_norm": 2.0771398544311523, - "learning_rate": 3.095707960363548e-06, - "loss": 1.1392, - "step": 2956 - }, - { - "epoch": 2.2380321665089875, - "grad_norm": 2.0793957710266113, - "learning_rate": 3.0899041712136474e-06, - "loss": 1.056, - "step": 2957 - }, - { - "epoch": 2.2387890255439924, - "grad_norm": 2.296447277069092, - "learning_rate": 3.084104836850719e-06, - "loss": 1.0637, - "step": 2958 - }, - { - "epoch": 2.2395458845789973, - "grad_norm": 2.2935092449188232, - "learning_rate": 3.0783099610129273e-06, - "loss": 1.0821, - "step": 2959 - }, - { - "epoch": 2.2403027436140017, - "grad_norm": 2.17787766456604, - "learning_rate": 3.0725195474355648e-06, - "loss": 1.0933, - "step": 2960 - }, - { - "epoch": 2.2410596026490066, - "grad_norm": 2.0003387928009033, - "learning_rate": 3.066733599851038e-06, - "loss": 1.0357, - "step": 2961 - }, - { - "epoch": 2.2418164616840115, - "grad_norm": 2.4547882080078125, - "learning_rate": 3.060952121988881e-06, - "loss": 1.1288, - "step": 2962 - }, - { - "epoch": 2.242573320719016, - "grad_norm": 2.319946765899658, - "learning_rate": 3.055175117575754e-06, - "loss": 1.09, - "step": 2963 - }, - { - "epoch": 2.243330179754021, - "grad_norm": 2.2627015113830566, - "learning_rate": 3.049402590335415e-06, - "loss": 1.1238, - "step": 2964 - }, - { - "epoch": 2.2440870387890257, - "grad_norm": 2.032540798187256, - "learning_rate": 3.043634543988752e-06, - "loss": 1.0622, - "step": 2965 - }, - { - "epoch": 2.24484389782403, - "grad_norm": 2.041095495223999, - "learning_rate": 3.037870982253763e-06, - "loss": 1.0729, - "step": 2966 - }, - { - "epoch": 2.245600756859035, - "grad_norm": 2.079834222793579, - "learning_rate": 3.032111908845547e-06, - "loss": 1.0541, - "step": 2967 - }, - { - "epoch": 2.24635761589404, - "grad_norm": 1.9280726909637451, - "learning_rate": 3.0263573274763165e-06, - "loss": 1.0715, - "step": 2968 - }, - { - "epoch": 2.2471144749290444, - "grad_norm": 1.9036996364593506, - "learning_rate": 3.0206072418553854e-06, - "loss": 1.0914, - "step": 2969 - }, - { - "epoch": 2.2478713339640493, - "grad_norm": 1.9973169565200806, - "learning_rate": 3.0148616556891774e-06, - "loss": 1.0612, - "step": 2970 - }, - { - "epoch": 2.248628192999054, - "grad_norm": 2.048168420791626, - "learning_rate": 3.009120572681206e-06, - "loss": 1.0573, - "step": 2971 - }, - { - "epoch": 2.2493850520340586, - "grad_norm": 1.9385312795639038, - "learning_rate": 3.0033839965320797e-06, - "loss": 1.0503, - "step": 2972 - }, - { - "epoch": 2.2501419110690635, - "grad_norm": 1.8759933710098267, - "learning_rate": 2.9976519309395154e-06, - "loss": 1.0739, - "step": 2973 - }, - { - "epoch": 2.250898770104068, - "grad_norm": 2.2850966453552246, - "learning_rate": 2.9919243795983116e-06, - "loss": 1.0669, - "step": 2974 - }, - { - "epoch": 2.251655629139073, - "grad_norm": 2.017787218093872, - "learning_rate": 2.9862013462003634e-06, - "loss": 1.0993, - "step": 2975 - }, - { - "epoch": 2.2524124881740777, - "grad_norm": 1.9540081024169922, - "learning_rate": 2.980482834434648e-06, - "loss": 1.116, - "step": 2976 - }, - { - "epoch": 2.253169347209082, - "grad_norm": 2.056605577468872, - "learning_rate": 2.974768847987239e-06, - "loss": 1.0612, - "step": 2977 - }, - { - "epoch": 2.253926206244087, - "grad_norm": 2.3890209197998047, - "learning_rate": 2.969059390541273e-06, - "loss": 1.0817, - "step": 2978 - }, - { - "epoch": 2.254683065279092, - "grad_norm": 1.947478175163269, - "learning_rate": 2.963354465776983e-06, - "loss": 1.0236, - "step": 2979 - }, - { - "epoch": 2.2554399243140963, - "grad_norm": 2.1459438800811768, - "learning_rate": 2.9576540773716783e-06, - "loss": 1.0725, - "step": 2980 - }, - { - "epoch": 2.2561967833491012, - "grad_norm": 2.0048837661743164, - "learning_rate": 2.9519582289997423e-06, - "loss": 1.1, - "step": 2981 - }, - { - "epoch": 2.256953642384106, - "grad_norm": 2.1805686950683594, - "learning_rate": 2.9462669243326357e-06, - "loss": 1.0963, - "step": 2982 - }, - { - "epoch": 2.2577105014191106, - "grad_norm": 1.9871695041656494, - "learning_rate": 2.9405801670388784e-06, - "loss": 1.1184, - "step": 2983 - }, - { - "epoch": 2.2584673604541154, - "grad_norm": 2.142199754714966, - "learning_rate": 2.934897960784075e-06, - "loss": 1.0546, - "step": 2984 - }, - { - "epoch": 2.2592242194891203, - "grad_norm": 2.1177968978881836, - "learning_rate": 2.9292203092308823e-06, - "loss": 1.1136, - "step": 2985 - }, - { - "epoch": 2.2599810785241248, - "grad_norm": 2.1006743907928467, - "learning_rate": 2.9235472160390315e-06, - "loss": 1.0091, - "step": 2986 - }, - { - "epoch": 2.2607379375591297, - "grad_norm": 1.978402853012085, - "learning_rate": 2.917878684865312e-06, - "loss": 1.0677, - "step": 2987 - }, - { - "epoch": 2.2614947965941345, - "grad_norm": 2.2147839069366455, - "learning_rate": 2.9122147193635757e-06, - "loss": 1.0809, - "step": 2988 - }, - { - "epoch": 2.262251655629139, - "grad_norm": 2.2743515968322754, - "learning_rate": 2.9065553231847215e-06, - "loss": 1.0647, - "step": 2989 - }, - { - "epoch": 2.263008514664144, - "grad_norm": 2.038224935531616, - "learning_rate": 2.900900499976714e-06, - "loss": 1.0953, - "step": 2990 - }, - { - "epoch": 2.2637653736991483, - "grad_norm": 1.9364351034164429, - "learning_rate": 2.895250253384567e-06, - "loss": 1.0241, - "step": 2991 - }, - { - "epoch": 2.264522232734153, - "grad_norm": 2.0556015968322754, - "learning_rate": 2.8896045870503405e-06, - "loss": 1.0358, - "step": 2992 - }, - { - "epoch": 2.265279091769158, - "grad_norm": 2.6211061477661133, - "learning_rate": 2.8839635046131477e-06, - "loss": 1.058, - "step": 2993 - }, - { - "epoch": 2.2660359508041625, - "grad_norm": 2.0403685569763184, - "learning_rate": 2.87832700970914e-06, - "loss": 1.0957, - "step": 2994 - }, - { - "epoch": 2.2667928098391674, - "grad_norm": 2.106076955795288, - "learning_rate": 2.8726951059715184e-06, - "loss": 1.0622, - "step": 2995 - }, - { - "epoch": 2.2675496688741723, - "grad_norm": 1.878516674041748, - "learning_rate": 2.867067797030522e-06, - "loss": 1.0636, - "step": 2996 - }, - { - "epoch": 2.2683065279091768, - "grad_norm": 2.178928852081299, - "learning_rate": 2.861445086513431e-06, - "loss": 1.0347, - "step": 2997 - }, - { - "epoch": 2.2690633869441816, - "grad_norm": 2.5624477863311768, - "learning_rate": 2.855826978044558e-06, - "loss": 1.0171, - "step": 2998 - }, - { - "epoch": 2.2698202459791865, - "grad_norm": 1.9493463039398193, - "learning_rate": 2.8502134752452488e-06, - "loss": 1.0763, - "step": 2999 - }, - { - "epoch": 2.270577105014191, - "grad_norm": 1.9162508249282837, - "learning_rate": 2.844604581733879e-06, - "loss": 1.1071, - "step": 3000 - }, - { - "epoch": 2.271333964049196, - "grad_norm": 2.097134828567505, - "learning_rate": 2.8390003011258576e-06, - "loss": 1.0987, - "step": 3001 - }, - { - "epoch": 2.2720908230842007, - "grad_norm": 2.138456106185913, - "learning_rate": 2.83340063703362e-06, - "loss": 1.0962, - "step": 3002 - }, - { - "epoch": 2.272847682119205, - "grad_norm": 2.057185411453247, - "learning_rate": 2.8278055930666243e-06, - "loss": 1.0849, - "step": 3003 - }, - { - "epoch": 2.27360454115421, - "grad_norm": 2.094721555709839, - "learning_rate": 2.822215172831354e-06, - "loss": 1.1004, - "step": 3004 - }, - { - "epoch": 2.274361400189215, - "grad_norm": 2.1358296871185303, - "learning_rate": 2.8166293799312994e-06, - "loss": 1.0583, - "step": 3005 - }, - { - "epoch": 2.2751182592242194, - "grad_norm": 1.955474615097046, - "learning_rate": 2.8110482179669823e-06, - "loss": 1.0828, - "step": 3006 - }, - { - "epoch": 2.2758751182592243, - "grad_norm": 2.084411144256592, - "learning_rate": 2.805471690535935e-06, - "loss": 1.0635, - "step": 3007 - }, - { - "epoch": 2.2766319772942287, - "grad_norm": 2.111748218536377, - "learning_rate": 2.799899801232702e-06, - "loss": 1.0604, - "step": 3008 - }, - { - "epoch": 2.2773888363292336, - "grad_norm": 2.2352616786956787, - "learning_rate": 2.7943325536488373e-06, - "loss": 1.1397, - "step": 3009 - }, - { - "epoch": 2.2781456953642385, - "grad_norm": 2.1407878398895264, - "learning_rate": 2.788769951372908e-06, - "loss": 1.083, - "step": 3010 - }, - { - "epoch": 2.2789025543992434, - "grad_norm": 2.0809216499328613, - "learning_rate": 2.7832119979904798e-06, - "loss": 1.0496, - "step": 3011 - }, - { - "epoch": 2.279659413434248, - "grad_norm": 2.0093045234680176, - "learning_rate": 2.77765869708412e-06, - "loss": 1.0543, - "step": 3012 - }, - { - "epoch": 2.2804162724692527, - "grad_norm": 1.9954379796981812, - "learning_rate": 2.7721100522334056e-06, - "loss": 1.1095, - "step": 3013 - }, - { - "epoch": 2.281173131504257, - "grad_norm": 2.1180033683776855, - "learning_rate": 2.7665660670149092e-06, - "loss": 1.0982, - "step": 3014 - }, - { - "epoch": 2.281929990539262, - "grad_norm": 2.0234453678131104, - "learning_rate": 2.761026745002201e-06, - "loss": 1.0808, - "step": 3015 - }, - { - "epoch": 2.282686849574267, - "grad_norm": 2.1585819721221924, - "learning_rate": 2.7554920897658386e-06, - "loss": 1.0775, - "step": 3016 - }, - { - "epoch": 2.2834437086092714, - "grad_norm": 1.9864180088043213, - "learning_rate": 2.7499621048733775e-06, - "loss": 1.1177, - "step": 3017 - }, - { - "epoch": 2.2842005676442763, - "grad_norm": 2.359938621520996, - "learning_rate": 2.744436793889368e-06, - "loss": 1.0951, - "step": 3018 - }, - { - "epoch": 2.284957426679281, - "grad_norm": 2.0253729820251465, - "learning_rate": 2.7389161603753312e-06, - "loss": 1.0414, - "step": 3019 - }, - { - "epoch": 2.2857142857142856, - "grad_norm": 1.9887871742248535, - "learning_rate": 2.73340020788979e-06, - "loss": 1.0882, - "step": 3020 - }, - { - "epoch": 2.2864711447492905, - "grad_norm": 1.971255898475647, - "learning_rate": 2.7278889399882435e-06, - "loss": 1.0867, - "step": 3021 - }, - { - "epoch": 2.2872280037842954, - "grad_norm": 2.0510776042938232, - "learning_rate": 2.7223823602231664e-06, - "loss": 1.0738, - "step": 3022 - }, - { - "epoch": 2.2879848628193, - "grad_norm": 1.953727126121521, - "learning_rate": 2.7168804721440177e-06, - "loss": 1.0262, - "step": 3023 - }, - { - "epoch": 2.2887417218543047, - "grad_norm": 2.0659303665161133, - "learning_rate": 2.7113832792972323e-06, - "loss": 1.0748, - "step": 3024 - }, - { - "epoch": 2.289498580889309, - "grad_norm": 2.147465229034424, - "learning_rate": 2.705890785226219e-06, - "loss": 1.0691, - "step": 3025 - }, - { - "epoch": 2.290255439924314, - "grad_norm": 2.1003715991973877, - "learning_rate": 2.7004029934713516e-06, - "loss": 1.0623, - "step": 3026 - }, - { - "epoch": 2.291012298959319, - "grad_norm": 1.9521448612213135, - "learning_rate": 2.6949199075699754e-06, - "loss": 1.1025, - "step": 3027 - }, - { - "epoch": 2.291769157994324, - "grad_norm": 1.998204231262207, - "learning_rate": 2.689441531056408e-06, - "loss": 1.0875, - "step": 3028 - }, - { - "epoch": 2.2925260170293282, - "grad_norm": 1.98150634765625, - "learning_rate": 2.683967867461925e-06, - "loss": 1.1051, - "step": 3029 - }, - { - "epoch": 2.293282876064333, - "grad_norm": 2.0418317317962646, - "learning_rate": 2.678498920314767e-06, - "loss": 1.0871, - "step": 3030 - }, - { - "epoch": 2.2940397350993376, - "grad_norm": 2.075697898864746, - "learning_rate": 2.673034693140136e-06, - "loss": 1.1588, - "step": 3031 - }, - { - "epoch": 2.2947965941343424, - "grad_norm": 2.049619197845459, - "learning_rate": 2.6675751894601928e-06, - "loss": 1.1, - "step": 3032 - }, - { - "epoch": 2.2955534531693473, - "grad_norm": 1.9731786251068115, - "learning_rate": 2.6621204127940403e-06, - "loss": 1.0883, - "step": 3033 - }, - { - "epoch": 2.2963103122043518, - "grad_norm": 2.0121383666992188, - "learning_rate": 2.656670366657748e-06, - "loss": 1.0914, - "step": 3034 - }, - { - "epoch": 2.2970671712393567, - "grad_norm": 2.0904853343963623, - "learning_rate": 2.651225054564334e-06, - "loss": 1.0803, - "step": 3035 - }, - { - "epoch": 2.2978240302743616, - "grad_norm": 2.0923306941986084, - "learning_rate": 2.645784480023764e-06, - "loss": 1.0528, - "step": 3036 - }, - { - "epoch": 2.298580889309366, - "grad_norm": 2.1617391109466553, - "learning_rate": 2.6403486465429524e-06, - "loss": 1.1454, - "step": 3037 - }, - { - "epoch": 2.299337748344371, - "grad_norm": 1.9403904676437378, - "learning_rate": 2.634917557625747e-06, - "loss": 1.1051, - "step": 3038 - }, - { - "epoch": 2.3000946073793758, - "grad_norm": 2.3704395294189453, - "learning_rate": 2.629491216772951e-06, - "loss": 1.0736, - "step": 3039 - }, - { - "epoch": 2.30085146641438, - "grad_norm": 2.0632617473602295, - "learning_rate": 2.6240696274822976e-06, - "loss": 1.0948, - "step": 3040 - }, - { - "epoch": 2.301608325449385, - "grad_norm": 1.9306870698928833, - "learning_rate": 2.6186527932484595e-06, - "loss": 1.0978, - "step": 3041 - }, - { - "epoch": 2.3023651844843895, - "grad_norm": 2.1989099979400635, - "learning_rate": 2.61324071756305e-06, - "loss": 1.0513, - "step": 3042 - }, - { - "epoch": 2.3031220435193944, - "grad_norm": 2.053589105606079, - "learning_rate": 2.60783340391461e-06, - "loss": 1.0828, - "step": 3043 - }, - { - "epoch": 2.3038789025543993, - "grad_norm": 2.009385347366333, - "learning_rate": 2.602430855788607e-06, - "loss": 1.0859, - "step": 3044 - }, - { - "epoch": 2.304635761589404, - "grad_norm": 2.086993932723999, - "learning_rate": 2.597033076667443e-06, - "loss": 1.1311, - "step": 3045 - }, - { - "epoch": 2.3053926206244086, - "grad_norm": 2.1662371158599854, - "learning_rate": 2.5916400700304476e-06, - "loss": 1.0746, - "step": 3046 - }, - { - "epoch": 2.3061494796594135, - "grad_norm": 1.9955482482910156, - "learning_rate": 2.5862518393538662e-06, - "loss": 1.0841, - "step": 3047 - }, - { - "epoch": 2.306906338694418, - "grad_norm": 2.2083284854888916, - "learning_rate": 2.5808683881108743e-06, - "loss": 1.0738, - "step": 3048 - }, - { - "epoch": 2.307663197729423, - "grad_norm": 2.1207940578460693, - "learning_rate": 2.5754897197715566e-06, - "loss": 1.1198, - "step": 3049 - }, - { - "epoch": 2.3084200567644277, - "grad_norm": 2.125546455383301, - "learning_rate": 2.5701158378029245e-06, - "loss": 1.0487, - "step": 3050 - }, - { - "epoch": 2.309176915799432, - "grad_norm": 2.025674343109131, - "learning_rate": 2.564746745668899e-06, - "loss": 1.0782, - "step": 3051 - }, - { - "epoch": 2.309933774834437, - "grad_norm": 2.1108992099761963, - "learning_rate": 2.559382446830318e-06, - "loss": 1.1041, - "step": 3052 - }, - { - "epoch": 2.310690633869442, - "grad_norm": 2.149214267730713, - "learning_rate": 2.554022944744925e-06, - "loss": 1.0332, - "step": 3053 - }, - { - "epoch": 2.3114474929044464, - "grad_norm": 2.708857536315918, - "learning_rate": 2.5486682428673753e-06, - "loss": 1.0605, - "step": 3054 - }, - { - "epoch": 2.3122043519394513, - "grad_norm": 2.029184341430664, - "learning_rate": 2.5433183446492214e-06, - "loss": 1.1014, - "step": 3055 - }, - { - "epoch": 2.312961210974456, - "grad_norm": 1.9624137878417969, - "learning_rate": 2.537973253538931e-06, - "loss": 1.0924, - "step": 3056 - }, - { - "epoch": 2.3137180700094606, - "grad_norm": 2.015695571899414, - "learning_rate": 2.5326329729818673e-06, - "loss": 1.0847, - "step": 3057 - }, - { - "epoch": 2.3144749290444655, - "grad_norm": 2.0661072731018066, - "learning_rate": 2.5272975064202943e-06, - "loss": 1.0434, - "step": 3058 - }, - { - "epoch": 2.3152317880794704, - "grad_norm": 2.1128456592559814, - "learning_rate": 2.521966857293378e-06, - "loss": 1.0661, - "step": 3059 - }, - { - "epoch": 2.315988647114475, - "grad_norm": 2.1260507106781006, - "learning_rate": 2.5166410290371626e-06, - "loss": 1.084, - "step": 3060 - }, - { - "epoch": 2.3167455061494797, - "grad_norm": 2.091785430908203, - "learning_rate": 2.5113200250845996e-06, - "loss": 1.0772, - "step": 3061 - }, - { - "epoch": 2.3175023651844846, - "grad_norm": 2.1781039237976074, - "learning_rate": 2.5060038488655302e-06, - "loss": 1.0469, - "step": 3062 - }, - { - "epoch": 2.318259224219489, - "grad_norm": 2.150576114654541, - "learning_rate": 2.500692503806678e-06, - "loss": 1.1243, - "step": 3063 - }, - { - "epoch": 2.319016083254494, - "grad_norm": 2.021026372909546, - "learning_rate": 2.4953859933316555e-06, - "loss": 1.0894, - "step": 3064 - }, - { - "epoch": 2.3197729422894984, - "grad_norm": 2.0633111000061035, - "learning_rate": 2.490084320860961e-06, - "loss": 1.07, - "step": 3065 - }, - { - "epoch": 2.3205298013245033, - "grad_norm": 2.0326859951019287, - "learning_rate": 2.4847874898119706e-06, - "loss": 1.1148, - "step": 3066 - }, - { - "epoch": 2.321286660359508, - "grad_norm": 2.0705957412719727, - "learning_rate": 2.479495503598935e-06, - "loss": 1.056, - "step": 3067 - }, - { - "epoch": 2.3220435193945126, - "grad_norm": 2.0693790912628174, - "learning_rate": 2.474208365632993e-06, - "loss": 1.0678, - "step": 3068 - }, - { - "epoch": 2.3228003784295175, - "grad_norm": 2.152256965637207, - "learning_rate": 2.468926079322153e-06, - "loss": 1.0826, - "step": 3069 - }, - { - "epoch": 2.3235572374645224, - "grad_norm": 1.9246243238449097, - "learning_rate": 2.463648648071298e-06, - "loss": 1.0872, - "step": 3070 - }, - { - "epoch": 2.324314096499527, - "grad_norm": 2.0952563285827637, - "learning_rate": 2.4583760752821752e-06, - "loss": 1.0851, - "step": 3071 - }, - { - "epoch": 2.3250709555345317, - "grad_norm": 2.1515979766845703, - "learning_rate": 2.453108364353406e-06, - "loss": 1.0494, - "step": 3072 - }, - { - "epoch": 2.3258278145695366, - "grad_norm": 2.0528197288513184, - "learning_rate": 2.447845518680481e-06, - "loss": 1.0619, - "step": 3073 - }, - { - "epoch": 2.326584673604541, - "grad_norm": 2.0087759494781494, - "learning_rate": 2.4425875416557426e-06, - "loss": 1.0229, - "step": 3074 - }, - { - "epoch": 2.327341532639546, - "grad_norm": 2.0219340324401855, - "learning_rate": 2.437334436668407e-06, - "loss": 1.0621, - "step": 3075 - }, - { - "epoch": 2.328098391674551, - "grad_norm": 2.0388236045837402, - "learning_rate": 2.432086207104549e-06, - "loss": 1.111, - "step": 3076 - }, - { - "epoch": 2.3288552507095552, - "grad_norm": 2.142197370529175, - "learning_rate": 2.426842856347089e-06, - "loss": 1.0878, - "step": 3077 - }, - { - "epoch": 2.32961210974456, - "grad_norm": 1.8765891790390015, - "learning_rate": 2.4216043877758163e-06, - "loss": 1.1534, - "step": 3078 - }, - { - "epoch": 2.330368968779565, - "grad_norm": 2.0405936241149902, - "learning_rate": 2.416370804767367e-06, - "loss": 1.0497, - "step": 3079 - }, - { - "epoch": 2.3311258278145695, - "grad_norm": 1.989956259727478, - "learning_rate": 2.4111421106952317e-06, - "loss": 1.0716, - "step": 3080 - }, - { - "epoch": 2.3318826868495743, - "grad_norm": 2.097310781478882, - "learning_rate": 2.4059183089297432e-06, - "loss": 1.0932, - "step": 3081 - }, - { - "epoch": 2.332639545884579, - "grad_norm": 1.9613529443740845, - "learning_rate": 2.4006994028380835e-06, - "loss": 1.0853, - "step": 3082 - }, - { - "epoch": 2.3333964049195837, - "grad_norm": 2.1626081466674805, - "learning_rate": 2.3954853957842816e-06, - "loss": 1.1113, - "step": 3083 - }, - { - "epoch": 2.3341532639545886, - "grad_norm": 1.9847509860992432, - "learning_rate": 2.3902762911292063e-06, - "loss": 1.0632, - "step": 3084 - }, - { - "epoch": 2.334910122989593, - "grad_norm": 2.432072639465332, - "learning_rate": 2.385072092230568e-06, - "loss": 1.0981, - "step": 3085 - }, - { - "epoch": 2.335666982024598, - "grad_norm": 2.0696966648101807, - "learning_rate": 2.3798728024429136e-06, - "loss": 1.053, - "step": 3086 - }, - { - "epoch": 2.3364238410596028, - "grad_norm": 2.017995595932007, - "learning_rate": 2.374678425117631e-06, - "loss": 1.1, - "step": 3087 - }, - { - "epoch": 2.337180700094607, - "grad_norm": 2.056910753250122, - "learning_rate": 2.369488963602927e-06, - "loss": 1.1009, - "step": 3088 - }, - { - "epoch": 2.337937559129612, - "grad_norm": 2.1294288635253906, - "learning_rate": 2.3643044212438547e-06, - "loss": 1.1008, - "step": 3089 - }, - { - "epoch": 2.338694418164617, - "grad_norm": 1.9634032249450684, - "learning_rate": 2.3591248013822885e-06, - "loss": 1.0868, - "step": 3090 - }, - { - "epoch": 2.3394512771996214, - "grad_norm": 2.2118031978607178, - "learning_rate": 2.3539501073569357e-06, - "loss": 1.1081, - "step": 3091 - }, - { - "epoch": 2.3402081362346263, - "grad_norm": 2.3364391326904297, - "learning_rate": 2.348780342503326e-06, - "loss": 1.0373, - "step": 3092 - }, - { - "epoch": 2.340964995269631, - "grad_norm": 2.061373472213745, - "learning_rate": 2.343615510153806e-06, - "loss": 1.0809, - "step": 3093 - }, - { - "epoch": 2.3417218543046356, - "grad_norm": 1.9650219678878784, - "learning_rate": 2.338455613637553e-06, - "loss": 1.0659, - "step": 3094 - }, - { - "epoch": 2.3424787133396405, - "grad_norm": 2.1445631980895996, - "learning_rate": 2.333300656280552e-06, - "loss": 1.0708, - "step": 3095 - }, - { - "epoch": 2.3432355723746454, - "grad_norm": 2.209373950958252, - "learning_rate": 2.328150641405614e-06, - "loss": 1.0744, - "step": 3096 - }, - { - "epoch": 2.34399243140965, - "grad_norm": 2.036855936050415, - "learning_rate": 2.3230055723323587e-06, - "loss": 1.0878, - "step": 3097 - }, - { - "epoch": 2.3447492904446547, - "grad_norm": 2.2812464237213135, - "learning_rate": 2.317865452377222e-06, - "loss": 1.0321, - "step": 3098 - }, - { - "epoch": 2.345506149479659, - "grad_norm": 1.9373234510421753, - "learning_rate": 2.312730284853442e-06, - "loss": 1.1768, - "step": 3099 - }, - { - "epoch": 2.346263008514664, - "grad_norm": 1.9641289710998535, - "learning_rate": 2.3076000730710715e-06, - "loss": 1.0922, - "step": 3100 - }, - { - "epoch": 2.347019867549669, - "grad_norm": 2.2891197204589844, - "learning_rate": 2.3024748203369697e-06, - "loss": 1.0122, - "step": 3101 - }, - { - "epoch": 2.347776726584674, - "grad_norm": 2.042477607727051, - "learning_rate": 2.2973545299547907e-06, - "loss": 1.0209, - "step": 3102 - }, - { - "epoch": 2.3485335856196783, - "grad_norm": 2.082688331604004, - "learning_rate": 2.2922392052250012e-06, - "loss": 1.0879, - "step": 3103 - }, - { - "epoch": 2.349290444654683, - "grad_norm": 2.036217451095581, - "learning_rate": 2.287128849444857e-06, - "loss": 1.0736, - "step": 3104 - }, - { - "epoch": 2.3500473036896876, - "grad_norm": 2.099870443344116, - "learning_rate": 2.282023465908417e-06, - "loss": 1.0502, - "step": 3105 - }, - { - "epoch": 2.3508041627246925, - "grad_norm": 2.0198404788970947, - "learning_rate": 2.276923057906534e-06, - "loss": 1.1233, - "step": 3106 - }, - { - "epoch": 2.3515610217596974, - "grad_norm": 2.331169843673706, - "learning_rate": 2.271827628726853e-06, - "loss": 1.0535, - "step": 3107 - }, - { - "epoch": 2.352317880794702, - "grad_norm": 2.124520778656006, - "learning_rate": 2.2667371816538124e-06, - "loss": 1.0393, - "step": 3108 - }, - { - "epoch": 2.3530747398297067, - "grad_norm": 1.9916775226593018, - "learning_rate": 2.261651719968635e-06, - "loss": 1.0811, - "step": 3109 - }, - { - "epoch": 2.3538315988647116, - "grad_norm": 2.134824514389038, - "learning_rate": 2.2565712469493285e-06, - "loss": 1.0755, - "step": 3110 - }, - { - "epoch": 2.354588457899716, - "grad_norm": 2.083906412124634, - "learning_rate": 2.251495765870691e-06, - "loss": 1.0626, - "step": 3111 - }, - { - "epoch": 2.355345316934721, - "grad_norm": 2.06776762008667, - "learning_rate": 2.246425280004301e-06, - "loss": 1.0667, - "step": 3112 - }, - { - "epoch": 2.356102175969726, - "grad_norm": 2.042806386947632, - "learning_rate": 2.241359792618514e-06, - "loss": 1.0589, - "step": 3113 - }, - { - "epoch": 2.3568590350047303, - "grad_norm": 2.0756242275238037, - "learning_rate": 2.2362993069784754e-06, - "loss": 1.0757, - "step": 3114 - }, - { - "epoch": 2.357615894039735, - "grad_norm": 2.315819501876831, - "learning_rate": 2.231243826346082e-06, - "loss": 1.1302, - "step": 3115 - }, - { - "epoch": 2.3583727530747396, - "grad_norm": 2.038795232772827, - "learning_rate": 2.2261933539800276e-06, - "loss": 0.9891, - "step": 3116 - }, - { - "epoch": 2.3591296121097445, - "grad_norm": 1.8599226474761963, - "learning_rate": 2.2211478931357686e-06, - "loss": 1.0786, - "step": 3117 - }, - { - "epoch": 2.3598864711447494, - "grad_norm": 2.074420928955078, - "learning_rate": 2.2161074470655327e-06, - "loss": 1.0555, - "step": 3118 - }, - { - "epoch": 2.3606433301797543, - "grad_norm": 2.0066890716552734, - "learning_rate": 2.2110720190183143e-06, - "loss": 1.077, - "step": 3119 - }, - { - "epoch": 2.3614001892147587, - "grad_norm": 2.137488603591919, - "learning_rate": 2.2060416122398754e-06, - "loss": 1.0421, - "step": 3120 - }, - { - "epoch": 2.3621570482497636, - "grad_norm": 2.0553901195526123, - "learning_rate": 2.2010162299727382e-06, - "loss": 1.0978, - "step": 3121 - }, - { - "epoch": 2.362913907284768, - "grad_norm": 1.9921813011169434, - "learning_rate": 2.1959958754561846e-06, - "loss": 1.0452, - "step": 3122 - }, - { - "epoch": 2.363670766319773, - "grad_norm": 2.0219523906707764, - "learning_rate": 2.1909805519262607e-06, - "loss": 1.0474, - "step": 3123 - }, - { - "epoch": 2.364427625354778, - "grad_norm": 1.9772000312805176, - "learning_rate": 2.185970262615767e-06, - "loss": 1.0281, - "step": 3124 - }, - { - "epoch": 2.3651844843897822, - "grad_norm": 2.09308123588562, - "learning_rate": 2.1809650107542632e-06, - "loss": 1.082, - "step": 3125 - }, - { - "epoch": 2.365941343424787, - "grad_norm": 1.9955263137817383, - "learning_rate": 2.175964799568052e-06, - "loss": 1.0324, - "step": 3126 - }, - { - "epoch": 2.366698202459792, - "grad_norm": 2.1914641857147217, - "learning_rate": 2.1709696322801972e-06, - "loss": 1.093, - "step": 3127 - }, - { - "epoch": 2.3674550614947965, - "grad_norm": 1.9175313711166382, - "learning_rate": 2.1659795121105097e-06, - "loss": 1.0671, - "step": 3128 - }, - { - "epoch": 2.3682119205298013, - "grad_norm": 2.153555154800415, - "learning_rate": 2.16099444227554e-06, - "loss": 1.0857, - "step": 3129 - }, - { - "epoch": 2.3689687795648062, - "grad_norm": 2.1872153282165527, - "learning_rate": 2.1560144259885886e-06, - "loss": 1.0465, - "step": 3130 - }, - { - "epoch": 2.3697256385998107, - "grad_norm": 2.0605931282043457, - "learning_rate": 2.151039466459703e-06, - "loss": 1.0579, - "step": 3131 - }, - { - "epoch": 2.3704824976348156, - "grad_norm": 2.1121630668640137, - "learning_rate": 2.1460695668956603e-06, - "loss": 1.0797, - "step": 3132 - }, - { - "epoch": 2.37123935666982, - "grad_norm": 1.927918553352356, - "learning_rate": 2.1411047304999855e-06, - "loss": 1.065, - "step": 3133 - }, - { - "epoch": 2.371996215704825, - "grad_norm": 2.0213940143585205, - "learning_rate": 2.1361449604729334e-06, - "loss": 1.0243, - "step": 3134 - }, - { - "epoch": 2.3727530747398298, - "grad_norm": 2.0634241104125977, - "learning_rate": 2.1311902600115026e-06, - "loss": 1.0243, - "step": 3135 - }, - { - "epoch": 2.3735099337748347, - "grad_norm": 2.063898801803589, - "learning_rate": 2.126240632309412e-06, - "loss": 1.0804, - "step": 3136 - }, - { - "epoch": 2.374266792809839, - "grad_norm": 1.9965025186538696, - "learning_rate": 2.1212960805571153e-06, - "loss": 1.0819, - "step": 3137 - }, - { - "epoch": 2.375023651844844, - "grad_norm": 2.0790200233459473, - "learning_rate": 2.1163566079417965e-06, - "loss": 1.0549, - "step": 3138 - }, - { - "epoch": 2.3757805108798484, - "grad_norm": 2.2082910537719727, - "learning_rate": 2.1114222176473647e-06, - "loss": 1.041, - "step": 3139 - }, - { - "epoch": 2.3765373699148533, - "grad_norm": 2.1791975498199463, - "learning_rate": 2.1064929128544527e-06, - "loss": 1.0671, - "step": 3140 - }, - { - "epoch": 2.377294228949858, - "grad_norm": 2.011662006378174, - "learning_rate": 2.1015686967404155e-06, - "loss": 1.0451, - "step": 3141 - }, - { - "epoch": 2.3780510879848626, - "grad_norm": 2.092410087585449, - "learning_rate": 2.0966495724793328e-06, - "loss": 0.999, - "step": 3142 - }, - { - "epoch": 2.3788079470198675, - "grad_norm": 2.0836849212646484, - "learning_rate": 2.0917355432419856e-06, - "loss": 1.0693, - "step": 3143 - }, - { - "epoch": 2.3795648060548724, - "grad_norm": 2.1676831245422363, - "learning_rate": 2.0868266121958895e-06, - "loss": 1.0786, - "step": 3144 - }, - { - "epoch": 2.380321665089877, - "grad_norm": 1.94955313205719, - "learning_rate": 2.0819227825052655e-06, - "loss": 1.04, - "step": 3145 - }, - { - "epoch": 2.3810785241248817, - "grad_norm": 2.1713364124298096, - "learning_rate": 2.0770240573310464e-06, - "loss": 1.0997, - "step": 3146 - }, - { - "epoch": 2.3818353831598866, - "grad_norm": 1.9667205810546875, - "learning_rate": 2.07213043983088e-06, - "loss": 1.0526, - "step": 3147 - }, - { - "epoch": 2.382592242194891, - "grad_norm": 2.1327104568481445, - "learning_rate": 2.067241933159111e-06, - "loss": 1.0717, - "step": 3148 - }, - { - "epoch": 2.383349101229896, - "grad_norm": 2.0339972972869873, - "learning_rate": 2.0623585404668027e-06, - "loss": 1.0583, - "step": 3149 - }, - { - "epoch": 2.384105960264901, - "grad_norm": 2.172558069229126, - "learning_rate": 2.0574802649017087e-06, - "loss": 1.0814, - "step": 3150 - }, - { - "epoch": 2.3848628192999053, - "grad_norm": 2.1538596153259277, - "learning_rate": 2.0526071096082958e-06, - "loss": 1.0713, - "step": 3151 - }, - { - "epoch": 2.38561967833491, - "grad_norm": 2.009945869445801, - "learning_rate": 2.0477390777277238e-06, - "loss": 1.0783, - "step": 3152 - }, - { - "epoch": 2.386376537369915, - "grad_norm": 2.024836540222168, - "learning_rate": 2.042876172397855e-06, - "loss": 1.0648, - "step": 3153 - }, - { - "epoch": 2.3871333964049195, - "grad_norm": 1.9101126194000244, - "learning_rate": 2.0380183967532398e-06, - "loss": 1.0476, - "step": 3154 - }, - { - "epoch": 2.3878902554399244, - "grad_norm": 1.9727280139923096, - "learning_rate": 2.033165753925127e-06, - "loss": 1.0658, - "step": 3155 - }, - { - "epoch": 2.388647114474929, - "grad_norm": 2.0228683948516846, - "learning_rate": 2.0283182470414605e-06, - "loss": 1.0536, - "step": 3156 - }, - { - "epoch": 2.3894039735099337, - "grad_norm": 2.0721933841705322, - "learning_rate": 2.0234758792268626e-06, - "loss": 1.1227, - "step": 3157 - }, - { - "epoch": 2.3901608325449386, - "grad_norm": 2.15034818649292, - "learning_rate": 2.0186386536026563e-06, - "loss": 1.1274, - "step": 3158 - }, - { - "epoch": 2.390917691579943, - "grad_norm": 2.1228785514831543, - "learning_rate": 2.0138065732868377e-06, - "loss": 1.1003, - "step": 3159 - }, - { - "epoch": 2.391674550614948, - "grad_norm": 1.9539825916290283, - "learning_rate": 2.008979641394094e-06, - "loss": 1.0618, - "step": 3160 - }, - { - "epoch": 2.392431409649953, - "grad_norm": 2.054403066635132, - "learning_rate": 2.0041578610357924e-06, - "loss": 1.0761, - "step": 3161 - }, - { - "epoch": 2.3931882686849573, - "grad_norm": 2.2176952362060547, - "learning_rate": 1.9993412353199797e-06, - "loss": 1.0955, - "step": 3162 - }, - { - "epoch": 2.393945127719962, - "grad_norm": 1.9848977327346802, - "learning_rate": 1.9945297673513813e-06, - "loss": 1.0478, - "step": 3163 - }, - { - "epoch": 2.394701986754967, - "grad_norm": 2.0067944526672363, - "learning_rate": 1.9897234602313935e-06, - "loss": 1.0606, - "step": 3164 - }, - { - "epoch": 2.3954588457899715, - "grad_norm": 2.1978671550750732, - "learning_rate": 1.9849223170580863e-06, - "loss": 1.0673, - "step": 3165 - }, - { - "epoch": 2.3962157048249764, - "grad_norm": 2.103545665740967, - "learning_rate": 1.9801263409262044e-06, - "loss": 1.0697, - "step": 3166 - }, - { - "epoch": 2.3969725638599813, - "grad_norm": 2.0121638774871826, - "learning_rate": 1.975335534927164e-06, - "loss": 1.1051, - "step": 3167 - }, - { - "epoch": 2.3977294228949857, - "grad_norm": 2.227635383605957, - "learning_rate": 1.970549902149043e-06, - "loss": 1.1104, - "step": 3168 - }, - { - "epoch": 2.3984862819299906, - "grad_norm": 1.98334538936615, - "learning_rate": 1.965769445676593e-06, - "loss": 1.0538, - "step": 3169 - }, - { - "epoch": 2.3992431409649955, - "grad_norm": 2.092841863632202, - "learning_rate": 1.9609941685912137e-06, - "loss": 1.076, - "step": 3170 - }, - { - "epoch": 2.4, - "grad_norm": 1.9714951515197754, - "learning_rate": 1.9562240739709797e-06, - "loss": 1.0408, - "step": 3171 - }, - { - "epoch": 2.400756859035005, - "grad_norm": 2.228931427001953, - "learning_rate": 1.951459164890623e-06, - "loss": 1.07, - "step": 3172 - }, - { - "epoch": 2.4015137180700092, - "grad_norm": 2.2674131393432617, - "learning_rate": 1.9466994444215306e-06, - "loss": 1.0301, - "step": 3173 - }, - { - "epoch": 2.402270577105014, - "grad_norm": 2.017943859100342, - "learning_rate": 1.941944915631745e-06, - "loss": 1.0159, - "step": 3174 - }, - { - "epoch": 2.403027436140019, - "grad_norm": 2.2940430641174316, - "learning_rate": 1.937195581585966e-06, - "loss": 1.0656, - "step": 3175 - }, - { - "epoch": 2.403784295175024, - "grad_norm": 2.035090684890747, - "learning_rate": 1.9324514453455404e-06, - "loss": 1.0572, - "step": 3176 - }, - { - "epoch": 2.4045411542100283, - "grad_norm": 2.0770013332366943, - "learning_rate": 1.927712509968461e-06, - "loss": 1.0763, - "step": 3177 - }, - { - "epoch": 2.4052980132450332, - "grad_norm": 2.1525466442108154, - "learning_rate": 1.9229787785093784e-06, - "loss": 1.069, - "step": 3178 - }, - { - "epoch": 2.4060548722800377, - "grad_norm": 1.9798002243041992, - "learning_rate": 1.9182502540195826e-06, - "loss": 1.04, - "step": 3179 - }, - { - "epoch": 2.4068117313150426, - "grad_norm": 2.0167088508605957, - "learning_rate": 1.9135269395470117e-06, - "loss": 1.0951, - "step": 3180 - }, - { - "epoch": 2.4075685903500474, - "grad_norm": 1.9377844333648682, - "learning_rate": 1.908808838136235e-06, - "loss": 1.0678, - "step": 3181 - }, - { - "epoch": 2.408325449385052, - "grad_norm": 1.8350154161453247, - "learning_rate": 1.904095952828474e-06, - "loss": 1.0734, - "step": 3182 - }, - { - "epoch": 2.4090823084200568, - "grad_norm": 2.102295160293579, - "learning_rate": 1.8993882866615832e-06, - "loss": 1.0446, - "step": 3183 - }, - { - "epoch": 2.4098391674550617, - "grad_norm": 2.003739833831787, - "learning_rate": 1.8946858426700479e-06, - "loss": 1.1126, - "step": 3184 - }, - { - "epoch": 2.410596026490066, - "grad_norm": 2.410670280456543, - "learning_rate": 1.8899886238849949e-06, - "loss": 1.0803, - "step": 3185 - }, - { - "epoch": 2.411352885525071, - "grad_norm": 1.9924821853637695, - "learning_rate": 1.8852966333341822e-06, - "loss": 1.0713, - "step": 3186 - }, - { - "epoch": 2.412109744560076, - "grad_norm": 2.1071012020111084, - "learning_rate": 1.880609874041989e-06, - "loss": 1.0772, - "step": 3187 - }, - { - "epoch": 2.4128666035950803, - "grad_norm": 2.1172494888305664, - "learning_rate": 1.8759283490294333e-06, - "loss": 1.0584, - "step": 3188 - }, - { - "epoch": 2.413623462630085, - "grad_norm": 2.2310361862182617, - "learning_rate": 1.8712520613141525e-06, - "loss": 1.039, - "step": 3189 - }, - { - "epoch": 2.4143803216650896, - "grad_norm": 1.90047287940979, - "learning_rate": 1.8665810139104157e-06, - "loss": 1.0776, - "step": 3190 - }, - { - "epoch": 2.4151371807000945, - "grad_norm": 2.110424041748047, - "learning_rate": 1.8619152098291044e-06, - "loss": 1.0546, - "step": 3191 - }, - { - "epoch": 2.4158940397350994, - "grad_norm": 1.9257051944732666, - "learning_rate": 1.8572546520777214e-06, - "loss": 1.0843, - "step": 3192 - }, - { - "epoch": 2.4166508987701043, - "grad_norm": 2.1789603233337402, - "learning_rate": 1.852599343660396e-06, - "loss": 1.0479, - "step": 3193 - }, - { - "epoch": 2.4174077578051087, - "grad_norm": 1.8343688249588013, - "learning_rate": 1.847949287577868e-06, - "loss": 0.9984, - "step": 3194 - }, - { - "epoch": 2.4181646168401136, - "grad_norm": 2.1044609546661377, - "learning_rate": 1.843304486827492e-06, - "loss": 1.0603, - "step": 3195 - }, - { - "epoch": 2.418921475875118, - "grad_norm": 2.0383095741271973, - "learning_rate": 1.838664944403236e-06, - "loss": 1.0733, - "step": 3196 - }, - { - "epoch": 2.419678334910123, - "grad_norm": 1.9661799669265747, - "learning_rate": 1.8340306632956847e-06, - "loss": 1.0257, - "step": 3197 - }, - { - "epoch": 2.420435193945128, - "grad_norm": 2.1200876235961914, - "learning_rate": 1.8294016464920133e-06, - "loss": 1.0597, - "step": 3198 - }, - { - "epoch": 2.4211920529801323, - "grad_norm": 2.138803005218506, - "learning_rate": 1.8247778969760206e-06, - "loss": 1.0518, - "step": 3199 - }, - { - "epoch": 2.421948912015137, - "grad_norm": 1.937528371810913, - "learning_rate": 1.8201594177281053e-06, - "loss": 1.1165, - "step": 3200 - }, - { - "epoch": 2.422705771050142, - "grad_norm": 1.99111008644104, - "learning_rate": 1.8155462117252693e-06, - "loss": 1.0687, - "step": 3201 - }, - { - "epoch": 2.4234626300851465, - "grad_norm": 2.2547271251678467, - "learning_rate": 1.8109382819411164e-06, - "loss": 1.0613, - "step": 3202 - }, - { - "epoch": 2.4242194891201514, - "grad_norm": 1.8853436708450317, - "learning_rate": 1.8063356313458443e-06, - "loss": 1.1003, - "step": 3203 - }, - { - "epoch": 2.4249763481551563, - "grad_norm": 1.9870060682296753, - "learning_rate": 1.801738262906254e-06, - "loss": 1.0924, - "step": 3204 - }, - { - "epoch": 2.4257332071901607, - "grad_norm": 1.9391242265701294, - "learning_rate": 1.7971461795857367e-06, - "loss": 1.0116, - "step": 3205 - }, - { - "epoch": 2.4264900662251656, - "grad_norm": 2.092609167098999, - "learning_rate": 1.7925593843442798e-06, - "loss": 1.1001, - "step": 3206 - }, - { - "epoch": 2.42724692526017, - "grad_norm": 2.029475212097168, - "learning_rate": 1.787977880138463e-06, - "loss": 1.0716, - "step": 3207 - }, - { - "epoch": 2.428003784295175, - "grad_norm": 2.054161787033081, - "learning_rate": 1.783401669921456e-06, - "loss": 1.0752, - "step": 3208 - }, - { - "epoch": 2.42876064333018, - "grad_norm": 1.762110710144043, - "learning_rate": 1.7788307566430083e-06, - "loss": 1.0639, - "step": 3209 - }, - { - "epoch": 2.4295175023651847, - "grad_norm": 2.044447898864746, - "learning_rate": 1.774265143249467e-06, - "loss": 1.0905, - "step": 3210 - }, - { - "epoch": 2.430274361400189, - "grad_norm": 2.024933338165283, - "learning_rate": 1.7697048326837516e-06, - "loss": 1.0152, - "step": 3211 - }, - { - "epoch": 2.431031220435194, - "grad_norm": 1.9569860696792603, - "learning_rate": 1.7651498278853708e-06, - "loss": 1.0603, - "step": 3212 - }, - { - "epoch": 2.4317880794701985, - "grad_norm": 2.0603296756744385, - "learning_rate": 1.760600131790414e-06, - "loss": 1.1086, - "step": 3213 - }, - { - "epoch": 2.4325449385052034, - "grad_norm": 1.8924018144607544, - "learning_rate": 1.7560557473315413e-06, - "loss": 1.0509, - "step": 3214 - }, - { - "epoch": 2.4333017975402083, - "grad_norm": 1.9490152597427368, - "learning_rate": 1.7515166774379947e-06, - "loss": 1.0518, - "step": 3215 - }, - { - "epoch": 2.4340586565752127, - "grad_norm": 2.046515703201294, - "learning_rate": 1.746982925035591e-06, - "loss": 1.0466, - "step": 3216 - }, - { - "epoch": 2.4348155156102176, - "grad_norm": 2.0436580181121826, - "learning_rate": 1.7424544930467205e-06, - "loss": 1.0642, - "step": 3217 - }, - { - "epoch": 2.4355723746452225, - "grad_norm": 2.1087872982025146, - "learning_rate": 1.7379313843903357e-06, - "loss": 1.0838, - "step": 3218 - }, - { - "epoch": 2.436329233680227, - "grad_norm": 1.896474838256836, - "learning_rate": 1.7334136019819681e-06, - "loss": 1.0678, - "step": 3219 - }, - { - "epoch": 2.437086092715232, - "grad_norm": 2.087778091430664, - "learning_rate": 1.7289011487337059e-06, - "loss": 1.0402, - "step": 3220 - }, - { - "epoch": 2.4378429517502367, - "grad_norm": 1.9922432899475098, - "learning_rate": 1.7243940275542126e-06, - "loss": 1.119, - "step": 3221 - }, - { - "epoch": 2.438599810785241, - "grad_norm": 2.1278886795043945, - "learning_rate": 1.7198922413487073e-06, - "loss": 1.1303, - "step": 3222 - }, - { - "epoch": 2.439356669820246, - "grad_norm": 2.059356689453125, - "learning_rate": 1.7153957930189735e-06, - "loss": 1.0732, - "step": 3223 - }, - { - "epoch": 2.440113528855251, - "grad_norm": 2.127638578414917, - "learning_rate": 1.7109046854633587e-06, - "loss": 1.0715, - "step": 3224 - }, - { - "epoch": 2.4408703878902553, - "grad_norm": 2.0281498432159424, - "learning_rate": 1.7064189215767526e-06, - "loss": 1.0378, - "step": 3225 - }, - { - "epoch": 2.4416272469252602, - "grad_norm": 2.8992788791656494, - "learning_rate": 1.7019385042506134e-06, - "loss": 1.0982, - "step": 3226 - }, - { - "epoch": 2.442384105960265, - "grad_norm": 2.046388626098633, - "learning_rate": 1.697463436372951e-06, - "loss": 1.0689, - "step": 3227 - }, - { - "epoch": 2.4431409649952696, - "grad_norm": 1.9682432413101196, - "learning_rate": 1.692993720828327e-06, - "loss": 1.0886, - "step": 3228 - }, - { - "epoch": 2.4438978240302744, - "grad_norm": 2.190717935562134, - "learning_rate": 1.6885293604978495e-06, - "loss": 1.0941, - "step": 3229 - }, - { - "epoch": 2.444654683065279, - "grad_norm": 2.263735294342041, - "learning_rate": 1.6840703582591808e-06, - "loss": 1.1485, - "step": 3230 - }, - { - "epoch": 2.4454115421002838, - "grad_norm": 1.917043924331665, - "learning_rate": 1.6796167169865243e-06, - "loss": 1.044, - "step": 3231 - }, - { - "epoch": 2.4461684011352887, - "grad_norm": 2.120823621749878, - "learning_rate": 1.6751684395506248e-06, - "loss": 1.0492, - "step": 3232 - }, - { - "epoch": 2.446925260170293, - "grad_norm": 1.9636114835739136, - "learning_rate": 1.6707255288187776e-06, - "loss": 1.0525, - "step": 3233 - }, - { - "epoch": 2.447682119205298, - "grad_norm": 2.068773031234741, - "learning_rate": 1.6662879876548164e-06, - "loss": 1.1185, - "step": 3234 - }, - { - "epoch": 2.448438978240303, - "grad_norm": 1.9608315229415894, - "learning_rate": 1.661855818919112e-06, - "loss": 1.0705, - "step": 3235 - }, - { - "epoch": 2.4491958372753073, - "grad_norm": 2.037750005722046, - "learning_rate": 1.65742902546857e-06, - "loss": 1.1109, - "step": 3236 - }, - { - "epoch": 2.449952696310312, - "grad_norm": 2.2666871547698975, - "learning_rate": 1.653007610156637e-06, - "loss": 1.0955, - "step": 3237 - }, - { - "epoch": 2.450709555345317, - "grad_norm": 2.0493760108947754, - "learning_rate": 1.6485915758332899e-06, - "loss": 1.0354, - "step": 3238 - }, - { - "epoch": 2.4514664143803215, - "grad_norm": 2.0443224906921387, - "learning_rate": 1.6441809253450347e-06, - "loss": 1.0605, - "step": 3239 - }, - { - "epoch": 2.4522232734153264, - "grad_norm": 1.7710448503494263, - "learning_rate": 1.6397756615349103e-06, - "loss": 1.064, - "step": 3240 - }, - { - "epoch": 2.4529801324503313, - "grad_norm": 2.0574166774749756, - "learning_rate": 1.6353757872424848e-06, - "loss": 1.0986, - "step": 3241 - }, - { - "epoch": 2.4537369914853357, - "grad_norm": 2.0350000858306885, - "learning_rate": 1.6309813053038476e-06, - "loss": 1.0722, - "step": 3242 - }, - { - "epoch": 2.4544938505203406, - "grad_norm": 2.178621530532837, - "learning_rate": 1.6265922185516136e-06, - "loss": 1.0777, - "step": 3243 - }, - { - "epoch": 2.4552507095553455, - "grad_norm": 1.874701976776123, - "learning_rate": 1.6222085298149237e-06, - "loss": 1.0678, - "step": 3244 - }, - { - "epoch": 2.45600756859035, - "grad_norm": 2.080073356628418, - "learning_rate": 1.617830241919439e-06, - "loss": 1.1109, - "step": 3245 - }, - { - "epoch": 2.456764427625355, - "grad_norm": 2.067389488220215, - "learning_rate": 1.6134573576873347e-06, - "loss": 1.0613, - "step": 3246 - }, - { - "epoch": 2.4575212866603593, - "grad_norm": 2.2184066772460938, - "learning_rate": 1.6090898799373013e-06, - "loss": 1.0445, - "step": 3247 - }, - { - "epoch": 2.458278145695364, - "grad_norm": 2.1395821571350098, - "learning_rate": 1.6047278114845524e-06, - "loss": 1.1291, - "step": 3248 - }, - { - "epoch": 2.459035004730369, - "grad_norm": 1.878059983253479, - "learning_rate": 1.6003711551408108e-06, - "loss": 1.1197, - "step": 3249 - }, - { - "epoch": 2.4597918637653735, - "grad_norm": 2.079202651977539, - "learning_rate": 1.5960199137143096e-06, - "loss": 1.0601, - "step": 3250 - }, - { - "epoch": 2.4605487228003784, - "grad_norm": 2.1114516258239746, - "learning_rate": 1.5916740900097936e-06, - "loss": 1.0981, - "step": 3251 - }, - { - "epoch": 2.4613055818353833, - "grad_norm": 2.0840392112731934, - "learning_rate": 1.5873336868285188e-06, - "loss": 1.0901, - "step": 3252 - }, - { - "epoch": 2.4620624408703877, - "grad_norm": 2.1868133544921875, - "learning_rate": 1.582998706968233e-06, - "loss": 1.1103, - "step": 3253 - }, - { - "epoch": 2.4628192999053926, - "grad_norm": 2.0153892040252686, - "learning_rate": 1.5786691532232047e-06, - "loss": 1.0825, - "step": 3254 - }, - { - "epoch": 2.4635761589403975, - "grad_norm": 2.147407054901123, - "learning_rate": 1.5743450283841957e-06, - "loss": 1.0529, - "step": 3255 - }, - { - "epoch": 2.464333017975402, - "grad_norm": 2.2476887702941895, - "learning_rate": 1.5700263352384732e-06, - "loss": 1.0551, - "step": 3256 - }, - { - "epoch": 2.465089877010407, - "grad_norm": 1.897383689880371, - "learning_rate": 1.5657130765698006e-06, - "loss": 1.0773, - "step": 3257 - }, - { - "epoch": 2.4658467360454117, - "grad_norm": 2.2343618869781494, - "learning_rate": 1.56140525515844e-06, - "loss": 1.0388, - "step": 3258 - }, - { - "epoch": 2.466603595080416, - "grad_norm": 2.182474136352539, - "learning_rate": 1.5571028737811414e-06, - "loss": 1.0837, - "step": 3259 - }, - { - "epoch": 2.467360454115421, - "grad_norm": 1.94349205493927, - "learning_rate": 1.5528059352111586e-06, - "loss": 1.0374, - "step": 3260 - }, - { - "epoch": 2.468117313150426, - "grad_norm": 2.3165524005889893, - "learning_rate": 1.5485144422182325e-06, - "loss": 1.049, - "step": 3261 - }, - { - "epoch": 2.4688741721854304, - "grad_norm": 2.2094292640686035, - "learning_rate": 1.5442283975685937e-06, - "loss": 1.0934, - "step": 3262 - }, - { - "epoch": 2.4696310312204353, - "grad_norm": 2.0244195461273193, - "learning_rate": 1.5399478040249638e-06, - "loss": 1.044, - "step": 3263 - }, - { - "epoch": 2.4703878902554397, - "grad_norm": 1.9300179481506348, - "learning_rate": 1.5356726643465427e-06, - "loss": 1.1156, - "step": 3264 - }, - { - "epoch": 2.4711447492904446, - "grad_norm": 2.06846022605896, - "learning_rate": 1.5314029812890258e-06, - "loss": 1.012, - "step": 3265 - }, - { - "epoch": 2.4719016083254495, - "grad_norm": 2.2604005336761475, - "learning_rate": 1.5271387576045804e-06, - "loss": 1.1042, - "step": 3266 - }, - { - "epoch": 2.4726584673604544, - "grad_norm": 2.3489127159118652, - "learning_rate": 1.5228799960418639e-06, - "loss": 1.05, - "step": 3267 - }, - { - "epoch": 2.473415326395459, - "grad_norm": 2.0200610160827637, - "learning_rate": 1.518626699346009e-06, - "loss": 1.1298, - "step": 3268 - }, - { - "epoch": 2.4741721854304637, - "grad_norm": 2.0748353004455566, - "learning_rate": 1.514378870258623e-06, - "loss": 1.0477, - "step": 3269 - }, - { - "epoch": 2.474929044465468, - "grad_norm": 2.0303914546966553, - "learning_rate": 1.510136511517792e-06, - "loss": 1.0319, - "step": 3270 - }, - { - "epoch": 2.475685903500473, - "grad_norm": 1.9617363214492798, - "learning_rate": 1.5058996258580788e-06, - "loss": 1.1149, - "step": 3271 - }, - { - "epoch": 2.476442762535478, - "grad_norm": 2.0544240474700928, - "learning_rate": 1.5016682160105153e-06, - "loss": 1.0733, - "step": 3272 - }, - { - "epoch": 2.4771996215704823, - "grad_norm": 2.3402349948883057, - "learning_rate": 1.4974422847026002e-06, - "loss": 1.0615, - "step": 3273 - }, - { - "epoch": 2.4779564806054872, - "grad_norm": 2.0029454231262207, - "learning_rate": 1.4932218346583082e-06, - "loss": 1.0688, - "step": 3274 - }, - { - "epoch": 2.478713339640492, - "grad_norm": 1.8487077951431274, - "learning_rate": 1.4890068685980732e-06, - "loss": 1.1071, - "step": 3275 - }, - { - "epoch": 2.4794701986754966, - "grad_norm": 2.282620906829834, - "learning_rate": 1.4847973892388003e-06, - "loss": 1.0802, - "step": 3276 - }, - { - "epoch": 2.4802270577105014, - "grad_norm": 1.9295916557312012, - "learning_rate": 1.4805933992938547e-06, - "loss": 1.0663, - "step": 3277 - }, - { - "epoch": 2.4809839167455063, - "grad_norm": 2.0331246852874756, - "learning_rate": 1.476394901473066e-06, - "loss": 1.0906, - "step": 3278 - }, - { - "epoch": 2.4817407757805108, - "grad_norm": 2.1244752407073975, - "learning_rate": 1.4722018984827247e-06, - "loss": 1.0655, - "step": 3279 - }, - { - "epoch": 2.4824976348155157, - "grad_norm": 2.140397787094116, - "learning_rate": 1.4680143930255675e-06, - "loss": 1.0969, - "step": 3280 - }, - { - "epoch": 2.48325449385052, - "grad_norm": 2.1430792808532715, - "learning_rate": 1.4638323878008022e-06, - "loss": 1.06, - "step": 3281 - }, - { - "epoch": 2.484011352885525, - "grad_norm": 2.28765606880188, - "learning_rate": 1.459655885504086e-06, - "loss": 1.1159, - "step": 3282 - }, - { - "epoch": 2.48476821192053, - "grad_norm": 1.9069238901138306, - "learning_rate": 1.455484888827526e-06, - "loss": 1.0083, - "step": 3283 - }, - { - "epoch": 2.4855250709555348, - "grad_norm": 1.848893642425537, - "learning_rate": 1.4513194004596865e-06, - "loss": 1.0527, - "step": 3284 - }, - { - "epoch": 2.486281929990539, - "grad_norm": 1.8594064712524414, - "learning_rate": 1.4471594230855774e-06, - "loss": 1.0815, - "step": 3285 - }, - { - "epoch": 2.487038789025544, - "grad_norm": 1.9376791715621948, - "learning_rate": 1.4430049593866543e-06, - "loss": 1.0403, - "step": 3286 - }, - { - "epoch": 2.4877956480605485, - "grad_norm": 2.031545639038086, - "learning_rate": 1.4388560120408215e-06, - "loss": 1.0378, - "step": 3287 - }, - { - "epoch": 2.4885525070955534, - "grad_norm": 2.0290255546569824, - "learning_rate": 1.4347125837224266e-06, - "loss": 1.05, - "step": 3288 - }, - { - "epoch": 2.4893093661305583, - "grad_norm": 2.070533037185669, - "learning_rate": 1.4305746771022623e-06, - "loss": 1.0854, - "step": 3289 - }, - { - "epoch": 2.4900662251655628, - "grad_norm": 3.2161612510681152, - "learning_rate": 1.4264422948475618e-06, - "loss": 1.0484, - "step": 3290 - }, - { - "epoch": 2.4908230842005676, - "grad_norm": 2.044058322906494, - "learning_rate": 1.4223154396219906e-06, - "loss": 1.0543, - "step": 3291 - }, - { - "epoch": 2.4915799432355725, - "grad_norm": 1.9972931146621704, - "learning_rate": 1.4181941140856595e-06, - "loss": 1.0482, - "step": 3292 - }, - { - "epoch": 2.492336802270577, - "grad_norm": 2.115438222885132, - "learning_rate": 1.4140783208951142e-06, - "loss": 1.1006, - "step": 3293 - }, - { - "epoch": 2.493093661305582, - "grad_norm": 1.9351952075958252, - "learning_rate": 1.4099680627033266e-06, - "loss": 1.0229, - "step": 3294 - }, - { - "epoch": 2.4938505203405867, - "grad_norm": 2.0593087673187256, - "learning_rate": 1.4058633421597104e-06, - "loss": 1.0653, - "step": 3295 - }, - { - "epoch": 2.494607379375591, - "grad_norm": 2.1452414989471436, - "learning_rate": 1.4017641619101074e-06, - "loss": 1.1039, - "step": 3296 - }, - { - "epoch": 2.495364238410596, - "grad_norm": 1.799978494644165, - "learning_rate": 1.3976705245967832e-06, - "loss": 1.0778, - "step": 3297 - }, - { - "epoch": 2.4961210974456005, - "grad_norm": 1.9537346363067627, - "learning_rate": 1.3935824328584335e-06, - "loss": 1.0688, - "step": 3298 - }, - { - "epoch": 2.4968779564806054, - "grad_norm": 2.1485798358917236, - "learning_rate": 1.3894998893301829e-06, - "loss": 1.0416, - "step": 3299 - }, - { - "epoch": 2.4976348155156103, - "grad_norm": 2.167556047439575, - "learning_rate": 1.3854228966435768e-06, - "loss": 1.0693, - "step": 3300 - }, - { - "epoch": 2.498391674550615, - "grad_norm": 2.1335930824279785, - "learning_rate": 1.3813514574265815e-06, - "loss": 1.0789, - "step": 3301 - }, - { - "epoch": 2.4991485335856196, - "grad_norm": 1.9826858043670654, - "learning_rate": 1.3772855743035818e-06, - "loss": 1.071, - "step": 3302 - }, - { - "epoch": 2.4999053926206245, - "grad_norm": 1.9714877605438232, - "learning_rate": 1.3732252498953874e-06, - "loss": 1.0915, - "step": 3303 - }, - { - "epoch": 2.500662251655629, - "grad_norm": 1.859167218208313, - "learning_rate": 1.3691704868192202e-06, - "loss": 1.0345, - "step": 3304 - }, - { - "epoch": 2.501419110690634, - "grad_norm": 1.9658515453338623, - "learning_rate": 1.3651212876887181e-06, - "loss": 1.0506, - "step": 3305 - }, - { - "epoch": 2.5021759697256387, - "grad_norm": 2.1254079341888428, - "learning_rate": 1.361077655113935e-06, - "loss": 1.0721, - "step": 3306 - }, - { - "epoch": 2.5029328287606436, - "grad_norm": 2.0176279544830322, - "learning_rate": 1.3570395917013365e-06, - "loss": 1.0711, - "step": 3307 - }, - { - "epoch": 2.503689687795648, - "grad_norm": 2.091989278793335, - "learning_rate": 1.353007100053791e-06, - "loss": 1.1103, - "step": 3308 - }, - { - "epoch": 2.504446546830653, - "grad_norm": 2.0587007999420166, - "learning_rate": 1.348980182770584e-06, - "loss": 1.0569, - "step": 3309 - }, - { - "epoch": 2.5052034058656574, - "grad_norm": 1.9162755012512207, - "learning_rate": 1.344958842447405e-06, - "loss": 1.0914, - "step": 3310 - }, - { - "epoch": 2.5059602649006623, - "grad_norm": 1.995250940322876, - "learning_rate": 1.3409430816763478e-06, - "loss": 1.0789, - "step": 3311 - }, - { - "epoch": 2.506717123935667, - "grad_norm": 2.189922571182251, - "learning_rate": 1.3369329030459152e-06, - "loss": 1.1112, - "step": 3312 - }, - { - "epoch": 2.5074739829706716, - "grad_norm": 2.006410837173462, - "learning_rate": 1.3329283091410014e-06, - "loss": 1.0911, - "step": 3313 - }, - { - "epoch": 2.5082308420056765, - "grad_norm": 1.9690699577331543, - "learning_rate": 1.3289293025429082e-06, - "loss": 1.0642, - "step": 3314 - }, - { - "epoch": 2.508987701040681, - "grad_norm": 1.9968689680099487, - "learning_rate": 1.324935885829334e-06, - "loss": 1.1037, - "step": 3315 - }, - { - "epoch": 2.509744560075686, - "grad_norm": 2.0873682498931885, - "learning_rate": 1.3209480615743746e-06, - "loss": 1.0804, - "step": 3316 - }, - { - "epoch": 2.5105014191106907, - "grad_norm": 2.100817918777466, - "learning_rate": 1.3169658323485212e-06, - "loss": 1.0313, - "step": 3317 - }, - { - "epoch": 2.5112582781456956, - "grad_norm": 2.1082022190093994, - "learning_rate": 1.3129892007186602e-06, - "loss": 1.0815, - "step": 3318 - }, - { - "epoch": 2.5120151371807, - "grad_norm": 2.102774143218994, - "learning_rate": 1.3090181692480642e-06, - "loss": 1.0529, - "step": 3319 - }, - { - "epoch": 2.512771996215705, - "grad_norm": 1.8931456804275513, - "learning_rate": 1.305052740496402e-06, - "loss": 1.029, - "step": 3320 - }, - { - "epoch": 2.5135288552507093, - "grad_norm": 1.9600942134857178, - "learning_rate": 1.301092917019724e-06, - "loss": 1.0499, - "step": 3321 - }, - { - "epoch": 2.5142857142857142, - "grad_norm": 1.9435330629348755, - "learning_rate": 1.2971387013704767e-06, - "loss": 1.0662, - "step": 3322 - }, - { - "epoch": 2.515042573320719, - "grad_norm": 2.6212613582611084, - "learning_rate": 1.2931900960974872e-06, - "loss": 1.0569, - "step": 3323 - }, - { - "epoch": 2.515799432355724, - "grad_norm": 2.1485402584075928, - "learning_rate": 1.2892471037459634e-06, - "loss": 1.0798, - "step": 3324 - }, - { - "epoch": 2.5165562913907285, - "grad_norm": 1.9852415323257446, - "learning_rate": 1.285309726857499e-06, - "loss": 1.0856, - "step": 3325 - }, - { - "epoch": 2.5173131504257333, - "grad_norm": 2.0448715686798096, - "learning_rate": 1.281377967970067e-06, - "loss": 1.0894, - "step": 3326 - }, - { - "epoch": 2.518070009460738, - "grad_norm": 1.966200590133667, - "learning_rate": 1.2774518296180222e-06, - "loss": 1.0583, - "step": 3327 - }, - { - "epoch": 2.5188268684957427, - "grad_norm": 2.0975255966186523, - "learning_rate": 1.2735313143320901e-06, - "loss": 1.1087, - "step": 3328 - }, - { - "epoch": 2.5195837275307476, - "grad_norm": 1.8325495719909668, - "learning_rate": 1.2696164246393766e-06, - "loss": 1.0857, - "step": 3329 - }, - { - "epoch": 2.520340586565752, - "grad_norm": 2.3163001537323, - "learning_rate": 1.265707163063358e-06, - "loss": 1.0535, - "step": 3330 - }, - { - "epoch": 2.521097445600757, - "grad_norm": 2.0799732208251953, - "learning_rate": 1.2618035321238856e-06, - "loss": 1.1036, - "step": 3331 - }, - { - "epoch": 2.5218543046357613, - "grad_norm": 1.9881048202514648, - "learning_rate": 1.257905534337181e-06, - "loss": 1.0693, - "step": 3332 - }, - { - "epoch": 2.522611163670766, - "grad_norm": 2.5260820388793945, - "learning_rate": 1.2540131722158336e-06, - "loss": 1.0339, - "step": 3333 - }, - { - "epoch": 2.523368022705771, - "grad_norm": 2.0032739639282227, - "learning_rate": 1.2501264482688052e-06, - "loss": 1.0888, - "step": 3334 - }, - { - "epoch": 2.524124881740776, - "grad_norm": 1.9609510898590088, - "learning_rate": 1.2462453650014107e-06, - "loss": 1.0654, - "step": 3335 - }, - { - "epoch": 2.5248817407757804, - "grad_norm": 2.018681049346924, - "learning_rate": 1.2423699249153408e-06, - "loss": 1.0763, - "step": 3336 - }, - { - "epoch": 2.5256385998107853, - "grad_norm": 1.9069342613220215, - "learning_rate": 1.2385001305086455e-06, - "loss": 1.0883, - "step": 3337 - }, - { - "epoch": 2.5263954588457898, - "grad_norm": 2.1848833560943604, - "learning_rate": 1.2346359842757345e-06, - "loss": 1.0894, - "step": 3338 - }, - { - "epoch": 2.5271523178807946, - "grad_norm": 2.210022449493408, - "learning_rate": 1.230777488707379e-06, - "loss": 1.0807, - "step": 3339 - }, - { - "epoch": 2.5279091769157995, - "grad_norm": 2.1791112422943115, - "learning_rate": 1.2269246462907065e-06, - "loss": 1.0707, - "step": 3340 - }, - { - "epoch": 2.5286660359508044, - "grad_norm": 1.9715594053268433, - "learning_rate": 1.2230774595092005e-06, - "loss": 1.0304, - "step": 3341 - }, - { - "epoch": 2.529422894985809, - "grad_norm": 2.082552433013916, - "learning_rate": 1.219235930842696e-06, - "loss": 1.088, - "step": 3342 - }, - { - "epoch": 2.5301797540208137, - "grad_norm": 2.0022144317626953, - "learning_rate": 1.215400062767385e-06, - "loss": 1.0153, - "step": 3343 - }, - { - "epoch": 2.530936613055818, - "grad_norm": 2.0545942783355713, - "learning_rate": 1.2115698577558096e-06, - "loss": 1.0895, - "step": 3344 - }, - { - "epoch": 2.531693472090823, - "grad_norm": 1.8354177474975586, - "learning_rate": 1.207745318276865e-06, - "loss": 1.082, - "step": 3345 - }, - { - "epoch": 2.532450331125828, - "grad_norm": 2.023404121398926, - "learning_rate": 1.203926446795787e-06, - "loss": 1.0675, - "step": 3346 - }, - { - "epoch": 2.5332071901608324, - "grad_norm": 1.8171032667160034, - "learning_rate": 1.2001132457741615e-06, - "loss": 1.0464, - "step": 3347 - }, - { - "epoch": 2.5339640491958373, - "grad_norm": 1.9232815504074097, - "learning_rate": 1.1963057176699249e-06, - "loss": 1.0647, - "step": 3348 - }, - { - "epoch": 2.5347209082308417, - "grad_norm": 1.9270484447479248, - "learning_rate": 1.1925038649373456e-06, - "loss": 1.1044, - "step": 3349 - }, - { - "epoch": 2.5354777672658466, - "grad_norm": 2.1430654525756836, - "learning_rate": 1.1887076900270418e-06, - "loss": 1.0809, - "step": 3350 - }, - { - "epoch": 2.5362346263008515, - "grad_norm": 2.0014898777008057, - "learning_rate": 1.1849171953859737e-06, - "loss": 1.0902, - "step": 3351 - }, - { - "epoch": 2.5369914853358564, - "grad_norm": 2.0257580280303955, - "learning_rate": 1.1811323834574302e-06, - "loss": 1.0911, - "step": 3352 - }, - { - "epoch": 2.537748344370861, - "grad_norm": 1.9925347566604614, - "learning_rate": 1.1773532566810477e-06, - "loss": 1.0666, - "step": 3353 - }, - { - "epoch": 2.5385052034058657, - "grad_norm": 2.072805404663086, - "learning_rate": 1.1735798174927917e-06, - "loss": 1.1, - "step": 3354 - }, - { - "epoch": 2.53926206244087, - "grad_norm": 2.0256335735321045, - "learning_rate": 1.1698120683249663e-06, - "loss": 1.0585, - "step": 3355 - }, - { - "epoch": 2.540018921475875, - "grad_norm": 2.217489004135132, - "learning_rate": 1.1660500116062037e-06, - "loss": 1.0761, - "step": 3356 - }, - { - "epoch": 2.54077578051088, - "grad_norm": 1.91505765914917, - "learning_rate": 1.1622936497614644e-06, - "loss": 1.0579, - "step": 3357 - }, - { - "epoch": 2.541532639545885, - "grad_norm": 1.9349195957183838, - "learning_rate": 1.1585429852120462e-06, - "loss": 1.074, - "step": 3358 - }, - { - "epoch": 2.5422894985808893, - "grad_norm": 1.915028691291809, - "learning_rate": 1.1547980203755697e-06, - "loss": 1.0717, - "step": 3359 - }, - { - "epoch": 2.543046357615894, - "grad_norm": 2.154155731201172, - "learning_rate": 1.1510587576659814e-06, - "loss": 1.057, - "step": 3360 - }, - { - "epoch": 2.5438032166508986, - "grad_norm": 2.6311988830566406, - "learning_rate": 1.1473251994935532e-06, - "loss": 1.1184, - "step": 3361 - }, - { - "epoch": 2.5445600756859035, - "grad_norm": 2.038876533508301, - "learning_rate": 1.1435973482648844e-06, - "loss": 1.0693, - "step": 3362 - }, - { - "epoch": 2.5453169347209084, - "grad_norm": 2.0545339584350586, - "learning_rate": 1.1398752063828815e-06, - "loss": 1.0887, - "step": 3363 - }, - { - "epoch": 2.546073793755913, - "grad_norm": 1.8213523626327515, - "learning_rate": 1.1361587762467873e-06, - "loss": 1.0866, - "step": 3364 - }, - { - "epoch": 2.5468306527909177, - "grad_norm": 2.055341958999634, - "learning_rate": 1.1324480602521524e-06, - "loss": 1.0923, - "step": 3365 - }, - { - "epoch": 2.5475875118259226, - "grad_norm": 2.2624013423919678, - "learning_rate": 1.1287430607908508e-06, - "loss": 1.0532, - "step": 3366 - }, - { - "epoch": 2.548344370860927, - "grad_norm": 2.7478582859039307, - "learning_rate": 1.1250437802510686e-06, - "loss": 1.0849, - "step": 3367 - }, - { - "epoch": 2.549101229895932, - "grad_norm": 1.8595587015151978, - "learning_rate": 1.1213502210173044e-06, - "loss": 1.0518, - "step": 3368 - }, - { - "epoch": 2.549858088930937, - "grad_norm": 2.0965359210968018, - "learning_rate": 1.1176623854703688e-06, - "loss": 1.0152, - "step": 3369 - }, - { - "epoch": 2.5506149479659412, - "grad_norm": 2.1459243297576904, - "learning_rate": 1.1139802759873852e-06, - "loss": 1.1309, - "step": 3370 - }, - { - "epoch": 2.551371807000946, - "grad_norm": 1.981099247932434, - "learning_rate": 1.110303894941786e-06, - "loss": 1.1094, - "step": 3371 - }, - { - "epoch": 2.5521286660359506, - "grad_norm": 2.0165510177612305, - "learning_rate": 1.10663324470331e-06, - "loss": 1.0812, - "step": 3372 - }, - { - "epoch": 2.5528855250709555, - "grad_norm": 1.839483380317688, - "learning_rate": 1.102968327638005e-06, - "loss": 1.0098, - "step": 3373 - }, - { - "epoch": 2.5536423841059603, - "grad_norm": 2.0099551677703857, - "learning_rate": 1.0993091461082154e-06, - "loss": 1.0111, - "step": 3374 - }, - { - "epoch": 2.5543992431409652, - "grad_norm": 2.1071789264678955, - "learning_rate": 1.0956557024725986e-06, - "loss": 1.018, - "step": 3375 - }, - { - "epoch": 2.5551561021759697, - "grad_norm": 2.2769486904144287, - "learning_rate": 1.0920079990861043e-06, - "loss": 1.0538, - "step": 3376 - }, - { - "epoch": 2.5559129612109746, - "grad_norm": 1.969045877456665, - "learning_rate": 1.088366038299989e-06, - "loss": 1.0186, - "step": 3377 - }, - { - "epoch": 2.556669820245979, - "grad_norm": 2.0804572105407715, - "learning_rate": 1.0847298224618053e-06, - "loss": 1.0581, - "step": 3378 - }, - { - "epoch": 2.557426679280984, - "grad_norm": 1.9232372045516968, - "learning_rate": 1.081099353915403e-06, - "loss": 1.0507, - "step": 3379 - }, - { - "epoch": 2.5581835383159888, - "grad_norm": 1.9603554010391235, - "learning_rate": 1.077474635000925e-06, - "loss": 1.0878, - "step": 3380 - }, - { - "epoch": 2.558940397350993, - "grad_norm": 2.1331593990325928, - "learning_rate": 1.07385566805481e-06, - "loss": 1.0751, - "step": 3381 - }, - { - "epoch": 2.559697256385998, - "grad_norm": 2.48708438873291, - "learning_rate": 1.070242455409791e-06, - "loss": 1.0929, - "step": 3382 - }, - { - "epoch": 2.560454115421003, - "grad_norm": 2.023110866546631, - "learning_rate": 1.066634999394886e-06, - "loss": 1.0477, - "step": 3383 - }, - { - "epoch": 2.5612109744560074, - "grad_norm": 2.201087236404419, - "learning_rate": 1.0630333023354118e-06, - "loss": 1.0736, - "step": 3384 - }, - { - "epoch": 2.5619678334910123, - "grad_norm": 2.0009500980377197, - "learning_rate": 1.0594373665529613e-06, - "loss": 1.0059, - "step": 3385 - }, - { - "epoch": 2.562724692526017, - "grad_norm": 1.9756640195846558, - "learning_rate": 1.0558471943654217e-06, - "loss": 1.0857, - "step": 3386 - }, - { - "epoch": 2.5634815515610216, - "grad_norm": 2.2636256217956543, - "learning_rate": 1.0522627880869646e-06, - "loss": 1.0848, - "step": 3387 - }, - { - "epoch": 2.5642384105960265, - "grad_norm": 2.0621442794799805, - "learning_rate": 1.0486841500280441e-06, - "loss": 1.0821, - "step": 3388 - }, - { - "epoch": 2.564995269631031, - "grad_norm": 2.0804009437561035, - "learning_rate": 1.0451112824953961e-06, - "loss": 1.072, - "step": 3389 - }, - { - "epoch": 2.565752128666036, - "grad_norm": 2.1010797023773193, - "learning_rate": 1.0415441877920349e-06, - "loss": 1.0384, - "step": 3390 - }, - { - "epoch": 2.5665089877010407, - "grad_norm": 2.0294950008392334, - "learning_rate": 1.037982868217254e-06, - "loss": 1.1007, - "step": 3391 - }, - { - "epoch": 2.5672658467360456, - "grad_norm": 2.2194080352783203, - "learning_rate": 1.0344273260666264e-06, - "loss": 1.0293, - "step": 3392 - }, - { - "epoch": 2.56802270577105, - "grad_norm": 2.0473580360412598, - "learning_rate": 1.0308775636320018e-06, - "loss": 1.0989, - "step": 3393 - }, - { - "epoch": 2.568779564806055, - "grad_norm": 2.1180901527404785, - "learning_rate": 1.027333583201503e-06, - "loss": 1.073, - "step": 3394 - }, - { - "epoch": 2.5695364238410594, - "grad_norm": 1.9611269235610962, - "learning_rate": 1.0237953870595262e-06, - "loss": 1.0638, - "step": 3395 - }, - { - "epoch": 2.5702932828760643, - "grad_norm": 2.031759023666382, - "learning_rate": 1.0202629774867378e-06, - "loss": 1.044, - "step": 3396 - }, - { - "epoch": 2.571050141911069, - "grad_norm": 2.155648708343506, - "learning_rate": 1.016736356760073e-06, - "loss": 1.0815, - "step": 3397 - }, - { - "epoch": 2.571807000946074, - "grad_norm": 2.0659499168395996, - "learning_rate": 1.0132155271527401e-06, - "loss": 1.0977, - "step": 3398 - }, - { - "epoch": 2.5725638599810785, - "grad_norm": 2.2170495986938477, - "learning_rate": 1.0097004909342112e-06, - "loss": 1.0449, - "step": 3399 - }, - { - "epoch": 2.5733207190160834, - "grad_norm": 1.9732736349105835, - "learning_rate": 1.0061912503702258e-06, - "loss": 1.0475, - "step": 3400 - }, - { - "epoch": 2.574077578051088, - "grad_norm": 1.9781739711761475, - "learning_rate": 1.0026878077227885e-06, - "loss": 1.0778, - "step": 3401 - }, - { - "epoch": 2.5748344370860927, - "grad_norm": 2.0298542976379395, - "learning_rate": 9.99190165250161e-07, - "loss": 1.0379, - "step": 3402 - }, - { - "epoch": 2.5755912961210976, - "grad_norm": 1.7894214391708374, - "learning_rate": 9.95698325206874e-07, - "loss": 1.069, - "step": 3403 - }, - { - "epoch": 2.576348155156102, - "grad_norm": 2.022477865219116, - "learning_rate": 9.922122898437122e-07, - "loss": 1.0623, - "step": 3404 - }, - { - "epoch": 2.577105014191107, - "grad_norm": 1.8968234062194824, - "learning_rate": 9.887320614077198e-07, - "loss": 1.04, - "step": 3405 - }, - { - "epoch": 2.5778618732261114, - "grad_norm": 2.217832326889038, - "learning_rate": 9.852576421422033e-07, - "loss": 1.0943, - "step": 3406 - }, - { - "epoch": 2.5786187322611163, - "grad_norm": 2.0771234035491943, - "learning_rate": 9.817890342867157e-07, - "loss": 1.0767, - "step": 3407 - }, - { - "epoch": 2.579375591296121, - "grad_norm": 1.9438964128494263, - "learning_rate": 9.783262400770708e-07, - "loss": 1.1243, - "step": 3408 - }, - { - "epoch": 2.580132450331126, - "grad_norm": 2.441040277481079, - "learning_rate": 9.748692617453326e-07, - "loss": 1.0624, - "step": 3409 - }, - { - "epoch": 2.5808893093661305, - "grad_norm": 2.0702598094940186, - "learning_rate": 9.714181015198182e-07, - "loss": 1.0696, - "step": 3410 - }, - { - "epoch": 2.5816461684011354, - "grad_norm": 2.0918853282928467, - "learning_rate": 9.67972761625091e-07, - "loss": 1.0814, - "step": 3411 - }, - { - "epoch": 2.58240302743614, - "grad_norm": 1.9000964164733887, - "learning_rate": 9.645332442819653e-07, - "loss": 1.0554, - "step": 3412 - }, - { - "epoch": 2.5831598864711447, - "grad_norm": 2.1118955612182617, - "learning_rate": 9.610995517075005e-07, - "loss": 1.0622, - "step": 3413 - }, - { - "epoch": 2.5839167455061496, - "grad_norm": 2.136005163192749, - "learning_rate": 9.57671686115003e-07, - "loss": 1.0871, - "step": 3414 - }, - { - "epoch": 2.5846736045411545, - "grad_norm": 2.0861973762512207, - "learning_rate": 9.542496497140228e-07, - "loss": 1.0348, - "step": 3415 - }, - { - "epoch": 2.585430463576159, - "grad_norm": 1.9754106998443604, - "learning_rate": 9.50833444710354e-07, - "loss": 1.0797, - "step": 3416 - }, - { - "epoch": 2.586187322611164, - "grad_norm": 2.138561964035034, - "learning_rate": 9.474230733060293e-07, - "loss": 1.1018, - "step": 3417 - }, - { - "epoch": 2.5869441816461682, - "grad_norm": 2.1578221321105957, - "learning_rate": 9.440185376993193e-07, - "loss": 1.1082, - "step": 3418 - }, - { - "epoch": 2.587701040681173, - "grad_norm": 1.928044080734253, - "learning_rate": 9.406198400847376e-07, - "loss": 1.0723, - "step": 3419 - }, - { - "epoch": 2.588457899716178, - "grad_norm": 2.0299084186553955, - "learning_rate": 9.372269826530338e-07, - "loss": 1.0557, - "step": 3420 - }, - { - "epoch": 2.5892147587511825, - "grad_norm": 2.100691556930542, - "learning_rate": 9.338399675911917e-07, - "loss": 1.0221, - "step": 3421 - }, - { - "epoch": 2.5899716177861873, - "grad_norm": 2.015913724899292, - "learning_rate": 9.304587970824288e-07, - "loss": 1.0651, - "step": 3422 - }, - { - "epoch": 2.590728476821192, - "grad_norm": 1.8734519481658936, - "learning_rate": 9.270834733061999e-07, - "loss": 1.0554, - "step": 3423 - }, - { - "epoch": 2.5914853358561967, - "grad_norm": 1.9088720083236694, - "learning_rate": 9.237139984381806e-07, - "loss": 1.0847, - "step": 3424 - }, - { - "epoch": 2.5922421948912016, - "grad_norm": 2.083169460296631, - "learning_rate": 9.203503746502859e-07, - "loss": 1.0714, - "step": 3425 - }, - { - "epoch": 2.5929990539262064, - "grad_norm": 1.9658424854278564, - "learning_rate": 9.169926041106579e-07, - "loss": 1.0231, - "step": 3426 - }, - { - "epoch": 2.593755912961211, - "grad_norm": 1.955154299736023, - "learning_rate": 9.13640688983662e-07, - "loss": 1.0539, - "step": 3427 - }, - { - "epoch": 2.5945127719962158, - "grad_norm": 2.0878820419311523, - "learning_rate": 9.102946314298959e-07, - "loss": 1.0911, - "step": 3428 - }, - { - "epoch": 2.59526963103122, - "grad_norm": 1.9909857511520386, - "learning_rate": 9.069544336061716e-07, - "loss": 1.083, - "step": 3429 - }, - { - "epoch": 2.596026490066225, - "grad_norm": 2.0630910396575928, - "learning_rate": 9.036200976655337e-07, - "loss": 1.0977, - "step": 3430 - }, - { - "epoch": 2.59678334910123, - "grad_norm": 1.982391595840454, - "learning_rate": 9.002916257572411e-07, - "loss": 1.0271, - "step": 3431 - }, - { - "epoch": 2.597540208136235, - "grad_norm": 1.9987069368362427, - "learning_rate": 8.969690200267786e-07, - "loss": 1.0813, - "step": 3432 - }, - { - "epoch": 2.5982970671712393, - "grad_norm": 1.983818531036377, - "learning_rate": 8.936522826158452e-07, - "loss": 1.0776, - "step": 3433 - }, - { - "epoch": 2.599053926206244, - "grad_norm": 1.9349209070205688, - "learning_rate": 8.903414156623622e-07, - "loss": 1.0509, - "step": 3434 - }, - { - "epoch": 2.5998107852412486, - "grad_norm": 2.01790714263916, - "learning_rate": 8.870364213004612e-07, - "loss": 1.0949, - "step": 3435 - }, - { - "epoch": 2.6005676442762535, - "grad_norm": 2.0256693363189697, - "learning_rate": 8.837373016604916e-07, - "loss": 1.0572, - "step": 3436 - }, - { - "epoch": 2.6013245033112584, - "grad_norm": 1.989288568496704, - "learning_rate": 8.804440588690183e-07, - "loss": 1.0321, - "step": 3437 - }, - { - "epoch": 2.602081362346263, - "grad_norm": 2.1254732608795166, - "learning_rate": 8.771566950488107e-07, - "loss": 1.0513, - "step": 3438 - }, - { - "epoch": 2.6028382213812677, - "grad_norm": 2.4187963008880615, - "learning_rate": 8.738752123188587e-07, - "loss": 1.0755, - "step": 3439 - }, - { - "epoch": 2.6035950804162726, - "grad_norm": 2.0207037925720215, - "learning_rate": 8.705996127943503e-07, - "loss": 1.0669, - "step": 3440 - }, - { - "epoch": 2.604351939451277, - "grad_norm": 2.1482834815979004, - "learning_rate": 8.6732989858669e-07, - "loss": 1.0675, - "step": 3441 - }, - { - "epoch": 2.605108798486282, - "grad_norm": 1.9809141159057617, - "learning_rate": 8.640660718034855e-07, - "loss": 1.1257, - "step": 3442 - }, - { - "epoch": 2.605865657521287, - "grad_norm": 2.1395535469055176, - "learning_rate": 8.608081345485507e-07, - "loss": 1.1311, - "step": 3443 - }, - { - "epoch": 2.6066225165562913, - "grad_norm": 2.1757044792175293, - "learning_rate": 8.575560889219027e-07, - "loss": 1.0888, - "step": 3444 - }, - { - "epoch": 2.607379375591296, - "grad_norm": 1.8578970432281494, - "learning_rate": 8.543099370197591e-07, - "loss": 1.0527, - "step": 3445 - }, - { - "epoch": 2.6081362346263006, - "grad_norm": 1.966065526008606, - "learning_rate": 8.51069680934539e-07, - "loss": 1.0301, - "step": 3446 - }, - { - "epoch": 2.6088930936613055, - "grad_norm": 1.9428819417953491, - "learning_rate": 8.478353227548625e-07, - "loss": 1.0255, - "step": 3447 - }, - { - "epoch": 2.6096499526963104, - "grad_norm": 1.873252272605896, - "learning_rate": 8.446068645655477e-07, - "loss": 1.092, - "step": 3448 - }, - { - "epoch": 2.6104068117313153, - "grad_norm": 2.0765583515167236, - "learning_rate": 8.413843084476109e-07, - "loss": 1.0985, - "step": 3449 - }, - { - "epoch": 2.6111636707663197, - "grad_norm": 2.0844457149505615, - "learning_rate": 8.381676564782655e-07, - "loss": 1.1138, - "step": 3450 - }, - { - "epoch": 2.6119205298013246, - "grad_norm": 1.9434021711349487, - "learning_rate": 8.349569107309078e-07, - "loss": 1.059, - "step": 3451 - }, - { - "epoch": 2.612677388836329, - "grad_norm": 2.0002236366271973, - "learning_rate": 8.317520732751409e-07, - "loss": 1.0701, - "step": 3452 - }, - { - "epoch": 2.613434247871334, - "grad_norm": 2.0566999912261963, - "learning_rate": 8.285531461767541e-07, - "loss": 1.066, - "step": 3453 - }, - { - "epoch": 2.614191106906339, - "grad_norm": 1.926048994064331, - "learning_rate": 8.253601314977264e-07, - "loss": 1.0811, - "step": 3454 - }, - { - "epoch": 2.6149479659413433, - "grad_norm": 2.086893320083618, - "learning_rate": 8.22173031296225e-07, - "loss": 1.0647, - "step": 3455 - }, - { - "epoch": 2.615704824976348, - "grad_norm": 2.1144938468933105, - "learning_rate": 8.189918476266104e-07, - "loss": 1.0737, - "step": 3456 - }, - { - "epoch": 2.616461684011353, - "grad_norm": 1.8913697004318237, - "learning_rate": 8.158165825394222e-07, - "loss": 1.0621, - "step": 3457 - }, - { - "epoch": 2.6172185430463575, - "grad_norm": 1.850129246711731, - "learning_rate": 8.126472380813851e-07, - "loss": 1.0475, - "step": 3458 - }, - { - "epoch": 2.6179754020813624, - "grad_norm": 1.9463554620742798, - "learning_rate": 8.094838162954142e-07, - "loss": 1.0652, - "step": 3459 - }, - { - "epoch": 2.6187322611163673, - "grad_norm": 1.9536323547363281, - "learning_rate": 8.063263192206013e-07, - "loss": 1.0567, - "step": 3460 - }, - { - "epoch": 2.6194891201513717, - "grad_norm": 1.9918063879013062, - "learning_rate": 8.031747488922231e-07, - "loss": 1.0604, - "step": 3461 - }, - { - "epoch": 2.6202459791863766, - "grad_norm": 2.074878215789795, - "learning_rate": 8.0002910734173e-07, - "loss": 1.0467, - "step": 3462 - }, - { - "epoch": 2.621002838221381, - "grad_norm": 2.0185697078704834, - "learning_rate": 7.968893965967558e-07, - "loss": 1.1014, - "step": 3463 - }, - { - "epoch": 2.621759697256386, - "grad_norm": 1.9658541679382324, - "learning_rate": 7.937556186811127e-07, - "loss": 1.0582, - "step": 3464 - }, - { - "epoch": 2.622516556291391, - "grad_norm": 2.0424704551696777, - "learning_rate": 7.906277756147835e-07, - "loss": 1.0843, - "step": 3465 - }, - { - "epoch": 2.6232734153263957, - "grad_norm": 2.1158058643341064, - "learning_rate": 7.875058694139282e-07, - "loss": 1.0359, - "step": 3466 - }, - { - "epoch": 2.6240302743614, - "grad_norm": 2.1120176315307617, - "learning_rate": 7.843899020908823e-07, - "loss": 1.0796, - "step": 3467 - }, - { - "epoch": 2.624787133396405, - "grad_norm": 1.910689353942871, - "learning_rate": 7.812798756541483e-07, - "loss": 1.0547, - "step": 3468 - }, - { - "epoch": 2.6255439924314095, - "grad_norm": 2.1395435333251953, - "learning_rate": 7.781757921084019e-07, - "loss": 1.0968, - "step": 3469 - }, - { - "epoch": 2.6263008514664143, - "grad_norm": 1.9301244020462036, - "learning_rate": 7.750776534544889e-07, - "loss": 1.0543, - "step": 3470 - }, - { - "epoch": 2.6270577105014192, - "grad_norm": 1.9667290449142456, - "learning_rate": 7.719854616894243e-07, - "loss": 1.0575, - "step": 3471 - }, - { - "epoch": 2.627814569536424, - "grad_norm": 1.8904736042022705, - "learning_rate": 7.688992188063853e-07, - "loss": 1.0781, - "step": 3472 - }, - { - "epoch": 2.6285714285714286, - "grad_norm": 1.991716980934143, - "learning_rate": 7.658189267947159e-07, - "loss": 1.052, - "step": 3473 - }, - { - "epoch": 2.6293282876064334, - "grad_norm": 2.0166666507720947, - "learning_rate": 7.627445876399259e-07, - "loss": 1.0668, - "step": 3474 - }, - { - "epoch": 2.630085146641438, - "grad_norm": 2.020129919052124, - "learning_rate": 7.596762033236895e-07, - "loss": 1.0539, - "step": 3475 - }, - { - "epoch": 2.6308420056764428, - "grad_norm": 2.1295621395111084, - "learning_rate": 7.566137758238386e-07, - "loss": 1.0264, - "step": 3476 - }, - { - "epoch": 2.6315988647114477, - "grad_norm": 2.1414082050323486, - "learning_rate": 7.53557307114367e-07, - "loss": 1.0476, - "step": 3477 - }, - { - "epoch": 2.632355723746452, - "grad_norm": 1.8784303665161133, - "learning_rate": 7.505067991654335e-07, - "loss": 1.0252, - "step": 3478 - }, - { - "epoch": 2.633112582781457, - "grad_norm": 2.0736515522003174, - "learning_rate": 7.474622539433398e-07, - "loss": 1.0661, - "step": 3479 - }, - { - "epoch": 2.6338694418164614, - "grad_norm": 1.9620708227157593, - "learning_rate": 7.444236734105581e-07, - "loss": 1.0835, - "step": 3480 - }, - { - "epoch": 2.6346263008514663, - "grad_norm": 2.1406285762786865, - "learning_rate": 7.413910595257105e-07, - "loss": 1.0577, - "step": 3481 - }, - { - "epoch": 2.635383159886471, - "grad_norm": 1.9883054494857788, - "learning_rate": 7.383644142435741e-07, - "loss": 1.0546, - "step": 3482 - }, - { - "epoch": 2.636140018921476, - "grad_norm": 1.9472057819366455, - "learning_rate": 7.353437395150799e-07, - "loss": 1.0452, - "step": 3483 - }, - { - "epoch": 2.6368968779564805, - "grad_norm": 2.0464439392089844, - "learning_rate": 7.323290372873055e-07, - "loss": 1.1029, - "step": 3484 - }, - { - "epoch": 2.6376537369914854, - "grad_norm": 1.9990071058273315, - "learning_rate": 7.293203095034839e-07, - "loss": 1.1126, - "step": 3485 - }, - { - "epoch": 2.63841059602649, - "grad_norm": 2.022820234298706, - "learning_rate": 7.263175581029933e-07, - "loss": 1.0625, - "step": 3486 - }, - { - "epoch": 2.6391674550614947, - "grad_norm": 1.835789442062378, - "learning_rate": 7.233207850213639e-07, - "loss": 1.0732, - "step": 3487 - }, - { - "epoch": 2.6399243140964996, - "grad_norm": 1.9170242547988892, - "learning_rate": 7.20329992190268e-07, - "loss": 1.1162, - "step": 3488 - }, - { - "epoch": 2.6406811731315045, - "grad_norm": 1.9878673553466797, - "learning_rate": 7.173451815375276e-07, - "loss": 1.0664, - "step": 3489 - }, - { - "epoch": 2.641438032166509, - "grad_norm": 2.3943591117858887, - "learning_rate": 7.14366354987102e-07, - "loss": 1.1248, - "step": 3490 - }, - { - "epoch": 2.642194891201514, - "grad_norm": 1.926537275314331, - "learning_rate": 7.113935144591011e-07, - "loss": 1.0654, - "step": 3491 - }, - { - "epoch": 2.6429517502365183, - "grad_norm": 2.132347583770752, - "learning_rate": 7.084266618697722e-07, - "loss": 1.058, - "step": 3492 - }, - { - "epoch": 2.643708609271523, - "grad_norm": 2.0414459705352783, - "learning_rate": 7.054657991315009e-07, - "loss": 1.0699, - "step": 3493 - }, - { - "epoch": 2.644465468306528, - "grad_norm": 2.3885207176208496, - "learning_rate": 7.025109281528162e-07, - "loss": 1.0914, - "step": 3494 - }, - { - "epoch": 2.6452223273415325, - "grad_norm": 2.1995675563812256, - "learning_rate": 6.995620508383816e-07, - "loss": 1.0691, - "step": 3495 - }, - { - "epoch": 2.6459791863765374, - "grad_norm": 1.9995527267456055, - "learning_rate": 6.966191690889987e-07, - "loss": 1.0715, - "step": 3496 - }, - { - "epoch": 2.646736045411542, - "grad_norm": 2.06939435005188, - "learning_rate": 6.936822848016048e-07, - "loss": 1.0522, - "step": 3497 - }, - { - "epoch": 2.6474929044465467, - "grad_norm": 2.001063346862793, - "learning_rate": 6.907513998692701e-07, - "loss": 1.0726, - "step": 3498 - }, - { - "epoch": 2.6482497634815516, - "grad_norm": 2.1571309566497803, - "learning_rate": 6.878265161812005e-07, - "loss": 1.0609, - "step": 3499 - }, - { - "epoch": 2.6490066225165565, - "grad_norm": 2.0034592151641846, - "learning_rate": 6.849076356227285e-07, - "loss": 1.0636, - "step": 3500 - }, - { - "epoch": 2.649763481551561, - "grad_norm": 1.8944875001907349, - "learning_rate": 6.819947600753214e-07, - "loss": 1.061, - "step": 3501 - }, - { - "epoch": 2.650520340586566, - "grad_norm": 1.9522629976272583, - "learning_rate": 6.790878914165723e-07, - "loss": 1.1064, - "step": 3502 - }, - { - "epoch": 2.6512771996215703, - "grad_norm": 1.9700335264205933, - "learning_rate": 6.761870315202072e-07, - "loss": 1.0509, - "step": 3503 - }, - { - "epoch": 2.652034058656575, - "grad_norm": 1.9173399209976196, - "learning_rate": 6.732921822560753e-07, - "loss": 1.0467, - "step": 3504 - }, - { - "epoch": 2.65279091769158, - "grad_norm": 2.1325631141662598, - "learning_rate": 6.704033454901552e-07, - "loss": 1.0547, - "step": 3505 - }, - { - "epoch": 2.653547776726585, - "grad_norm": 1.9540364742279053, - "learning_rate": 6.67520523084541e-07, - "loss": 1.1084, - "step": 3506 - }, - { - "epoch": 2.6543046357615894, - "grad_norm": 2.026878595352173, - "learning_rate": 6.646437168974577e-07, - "loss": 1.0511, - "step": 3507 - }, - { - "epoch": 2.6550614947965943, - "grad_norm": 2.238311290740967, - "learning_rate": 6.617729287832535e-07, - "loss": 1.1053, - "step": 3508 - }, - { - "epoch": 2.6558183538315987, - "grad_norm": 2.0573630332946777, - "learning_rate": 6.589081605923916e-07, - "loss": 1.0377, - "step": 3509 - }, - { - "epoch": 2.6565752128666036, - "grad_norm": 2.0835254192352295, - "learning_rate": 6.56049414171461e-07, - "loss": 1.0403, - "step": 3510 - }, - { - "epoch": 2.6573320719016085, - "grad_norm": 1.888987421989441, - "learning_rate": 6.531966913631649e-07, - "loss": 1.0874, - "step": 3511 - }, - { - "epoch": 2.658088930936613, - "grad_norm": 1.9704266786575317, - "learning_rate": 6.503499940063245e-07, - "loss": 1.0968, - "step": 3512 - }, - { - "epoch": 2.658845789971618, - "grad_norm": 2.062167167663574, - "learning_rate": 6.475093239358764e-07, - "loss": 1.074, - "step": 3513 - }, - { - "epoch": 2.6596026490066222, - "grad_norm": 2.041229724884033, - "learning_rate": 6.446746829828747e-07, - "loss": 1.0881, - "step": 3514 - }, - { - "epoch": 2.660359508041627, - "grad_norm": 2.0432045459747314, - "learning_rate": 6.41846072974484e-07, - "loss": 1.0497, - "step": 3515 - }, - { - "epoch": 2.661116367076632, - "grad_norm": 2.037137746810913, - "learning_rate": 6.390234957339877e-07, - "loss": 1.1183, - "step": 3516 - }, - { - "epoch": 2.661873226111637, - "grad_norm": 2.0373597145080566, - "learning_rate": 6.362069530807692e-07, - "loss": 1.0721, - "step": 3517 - }, - { - "epoch": 2.6626300851466413, - "grad_norm": 2.2422561645507812, - "learning_rate": 6.333964468303339e-07, - "loss": 1.1072, - "step": 3518 - }, - { - "epoch": 2.6633869441816462, - "grad_norm": 2.035428285598755, - "learning_rate": 6.305919787942921e-07, - "loss": 1.0755, - "step": 3519 - }, - { - "epoch": 2.6641438032166507, - "grad_norm": 2.0684683322906494, - "learning_rate": 6.277935507803559e-07, - "loss": 1.0673, - "step": 3520 - }, - { - "epoch": 2.6649006622516556, - "grad_norm": 2.1325268745422363, - "learning_rate": 6.25001164592354e-07, - "loss": 1.0269, - "step": 3521 - }, - { - "epoch": 2.6656575212866604, - "grad_norm": 1.9109140634536743, - "learning_rate": 6.222148220302141e-07, - "loss": 1.0445, - "step": 3522 - }, - { - "epoch": 2.6664143803216653, - "grad_norm": 2.0545241832733154, - "learning_rate": 6.1943452488997e-07, - "loss": 1.03, - "step": 3523 - }, - { - "epoch": 2.6671712393566698, - "grad_norm": 1.9249234199523926, - "learning_rate": 6.166602749637587e-07, - "loss": 1.0247, - "step": 3524 - }, - { - "epoch": 2.6679280983916747, - "grad_norm": 2.097187042236328, - "learning_rate": 6.138920740398207e-07, - "loss": 1.0982, - "step": 3525 - }, - { - "epoch": 2.668684957426679, - "grad_norm": 2.3547725677490234, - "learning_rate": 6.111299239024957e-07, - "loss": 1.0561, - "step": 3526 - }, - { - "epoch": 2.669441816461684, - "grad_norm": 1.9423364400863647, - "learning_rate": 6.083738263322244e-07, - "loss": 1.0832, - "step": 3527 - }, - { - "epoch": 2.670198675496689, - "grad_norm": 1.9852162599563599, - "learning_rate": 6.056237831055416e-07, - "loss": 1.0746, - "step": 3528 - }, - { - "epoch": 2.6709555345316933, - "grad_norm": 2.095628261566162, - "learning_rate": 6.02879795995085e-07, - "loss": 1.0651, - "step": 3529 - }, - { - "epoch": 2.671712393566698, - "grad_norm": 2.109067916870117, - "learning_rate": 6.001418667695884e-07, - "loss": 1.0847, - "step": 3530 - }, - { - "epoch": 2.672469252601703, - "grad_norm": 1.9949407577514648, - "learning_rate": 5.97409997193879e-07, - "loss": 1.1257, - "step": 3531 - }, - { - "epoch": 2.6732261116367075, - "grad_norm": 2.216343641281128, - "learning_rate": 5.946841890288763e-07, - "loss": 1.0922, - "step": 3532 - }, - { - "epoch": 2.6739829706717124, - "grad_norm": 1.9918749332427979, - "learning_rate": 5.91964444031599e-07, - "loss": 1.0832, - "step": 3533 - }, - { - "epoch": 2.6747398297067173, - "grad_norm": 2.0035977363586426, - "learning_rate": 5.892507639551483e-07, - "loss": 1.1023, - "step": 3534 - }, - { - "epoch": 2.6754966887417218, - "grad_norm": 1.9594407081604004, - "learning_rate": 5.86543150548722e-07, - "loss": 1.0479, - "step": 3535 - }, - { - "epoch": 2.6762535477767266, - "grad_norm": 1.9996135234832764, - "learning_rate": 5.838416055576072e-07, - "loss": 1.0837, - "step": 3536 - }, - { - "epoch": 2.677010406811731, - "grad_norm": 2.032686710357666, - "learning_rate": 5.811461307231798e-07, - "loss": 1.0673, - "step": 3537 - }, - { - "epoch": 2.677767265846736, - "grad_norm": 1.7957963943481445, - "learning_rate": 5.784567277829007e-07, - "loss": 1.0672, - "step": 3538 - }, - { - "epoch": 2.678524124881741, - "grad_norm": 1.936874508857727, - "learning_rate": 5.757733984703174e-07, - "loss": 1.1329, - "step": 3539 - }, - { - "epoch": 2.6792809839167457, - "grad_norm": 2.072567939758301, - "learning_rate": 5.730961445150644e-07, - "loss": 1.1066, - "step": 3540 - }, - { - "epoch": 2.68003784295175, - "grad_norm": 1.8656892776489258, - "learning_rate": 5.704249676428575e-07, - "loss": 1.1158, - "step": 3541 - }, - { - "epoch": 2.680794701986755, - "grad_norm": 1.9235533475875854, - "learning_rate": 5.677598695754967e-07, - "loss": 1.0364, - "step": 3542 - }, - { - "epoch": 2.6815515610217595, - "grad_norm": 2.159919261932373, - "learning_rate": 5.651008520308641e-07, - "loss": 1.0813, - "step": 3543 - }, - { - "epoch": 2.6823084200567644, - "grad_norm": 2.093416929244995, - "learning_rate": 5.624479167229225e-07, - "loss": 1.082, - "step": 3544 - }, - { - "epoch": 2.6830652790917693, - "grad_norm": 1.9927400350570679, - "learning_rate": 5.598010653617116e-07, - "loss": 1.0486, - "step": 3545 - }, - { - "epoch": 2.6838221381267737, - "grad_norm": 1.986259937286377, - "learning_rate": 5.571602996533528e-07, - "loss": 1.0532, - "step": 3546 - }, - { - "epoch": 2.6845789971617786, - "grad_norm": 2.1325762271881104, - "learning_rate": 5.54525621300045e-07, - "loss": 1.1321, - "step": 3547 - }, - { - "epoch": 2.6853358561967835, - "grad_norm": 1.9752742052078247, - "learning_rate": 5.518970320000578e-07, - "loss": 1.0752, - "step": 3548 - }, - { - "epoch": 2.686092715231788, - "grad_norm": 1.9965808391571045, - "learning_rate": 5.492745334477438e-07, - "loss": 1.0721, - "step": 3549 - }, - { - "epoch": 2.686849574266793, - "grad_norm": 2.021066427230835, - "learning_rate": 5.466581273335216e-07, - "loss": 1.0819, - "step": 3550 - }, - { - "epoch": 2.6876064333017977, - "grad_norm": 2.0489556789398193, - "learning_rate": 5.440478153438891e-07, - "loss": 1.0542, - "step": 3551 - }, - { - "epoch": 2.688363292336802, - "grad_norm": 2.1207127571105957, - "learning_rate": 5.414435991614129e-07, - "loss": 1.0577, - "step": 3552 - }, - { - "epoch": 2.689120151371807, - "grad_norm": 2.099400281906128, - "learning_rate": 5.388454804647312e-07, - "loss": 1.0527, - "step": 3553 - }, - { - "epoch": 2.6898770104068115, - "grad_norm": 2.165239095687866, - "learning_rate": 5.362534609285534e-07, - "loss": 1.0728, - "step": 3554 - }, - { - "epoch": 2.6906338694418164, - "grad_norm": 1.964612364768982, - "learning_rate": 5.336675422236547e-07, - "loss": 1.1085, - "step": 3555 - }, - { - "epoch": 2.6913907284768213, - "grad_norm": 2.3169875144958496, - "learning_rate": 5.31087726016876e-07, - "loss": 1.0873, - "step": 3556 - }, - { - "epoch": 2.692147587511826, - "grad_norm": 2.005558490753174, - "learning_rate": 5.285140139711306e-07, - "loss": 1.0435, - "step": 3557 - }, - { - "epoch": 2.6929044465468306, - "grad_norm": 1.9185731410980225, - "learning_rate": 5.259464077453933e-07, - "loss": 1.1144, - "step": 3558 - }, - { - "epoch": 2.6936613055818355, - "grad_norm": 1.933445930480957, - "learning_rate": 5.233849089947034e-07, - "loss": 1.0526, - "step": 3559 - }, - { - "epoch": 2.69441816461684, - "grad_norm": 2.1504805088043213, - "learning_rate": 5.208295193701673e-07, - "loss": 1.0822, - "step": 3560 - }, - { - "epoch": 2.695175023651845, - "grad_norm": 2.1270816326141357, - "learning_rate": 5.182802405189443e-07, - "loss": 1.0848, - "step": 3561 - }, - { - "epoch": 2.6959318826868497, - "grad_norm": 2.112243890762329, - "learning_rate": 5.157370740842649e-07, - "loss": 1.0501, - "step": 3562 - }, - { - "epoch": 2.6966887417218546, - "grad_norm": 2.2307591438293457, - "learning_rate": 5.132000217054134e-07, - "loss": 1.1388, - "step": 3563 - }, - { - "epoch": 2.697445600756859, - "grad_norm": 2.053459405899048, - "learning_rate": 5.106690850177358e-07, - "loss": 1.0846, - "step": 3564 - }, - { - "epoch": 2.698202459791864, - "grad_norm": 2.0699667930603027, - "learning_rate": 5.08144265652635e-07, - "loss": 1.0567, - "step": 3565 - }, - { - "epoch": 2.6989593188268683, - "grad_norm": 2.0828826427459717, - "learning_rate": 5.056255652375729e-07, - "loss": 1.0729, - "step": 3566 - }, - { - "epoch": 2.6997161778618732, - "grad_norm": 1.9452773332595825, - "learning_rate": 5.031129853960639e-07, - "loss": 1.0788, - "step": 3567 - }, - { - "epoch": 2.700473036896878, - "grad_norm": 2.0344362258911133, - "learning_rate": 5.006065277476771e-07, - "loss": 1.054, - "step": 3568 - }, - { - "epoch": 2.7012298959318826, - "grad_norm": 1.900039792060852, - "learning_rate": 4.981061939080384e-07, - "loss": 1.0262, - "step": 3569 - }, - { - "epoch": 2.7019867549668874, - "grad_norm": 1.9985625743865967, - "learning_rate": 4.956119854888261e-07, - "loss": 1.0899, - "step": 3570 - }, - { - "epoch": 2.702743614001892, - "grad_norm": 2.088229179382324, - "learning_rate": 4.931239040977678e-07, - "loss": 1.1423, - "step": 3571 - }, - { - "epoch": 2.703500473036897, - "grad_norm": 1.9471749067306519, - "learning_rate": 4.90641951338641e-07, - "loss": 1.0762, - "step": 3572 - }, - { - "epoch": 2.7042573320719017, - "grad_norm": 1.8323947191238403, - "learning_rate": 4.88166128811277e-07, - "loss": 1.0531, - "step": 3573 - }, - { - "epoch": 2.7050141911069066, - "grad_norm": 1.9754245281219482, - "learning_rate": 4.856964381115542e-07, - "loss": 1.1185, - "step": 3574 - }, - { - "epoch": 2.705771050141911, - "grad_norm": 1.9460619688034058, - "learning_rate": 4.83232880831394e-07, - "loss": 1.1218, - "step": 3575 - }, - { - "epoch": 2.706527909176916, - "grad_norm": 2.2051377296447754, - "learning_rate": 4.807754585587696e-07, - "loss": 1.0773, - "step": 3576 - }, - { - "epoch": 2.7072847682119203, - "grad_norm": 1.9983853101730347, - "learning_rate": 4.783241728776997e-07, - "loss": 1.0325, - "step": 3577 - }, - { - "epoch": 2.708041627246925, - "grad_norm": 1.9599753618240356, - "learning_rate": 4.7587902536824234e-07, - "loss": 1.0701, - "step": 3578 - }, - { - "epoch": 2.70879848628193, - "grad_norm": 2.0052897930145264, - "learning_rate": 4.7344001760650454e-07, - "loss": 1.0672, - "step": 3579 - }, - { - "epoch": 2.709555345316935, - "grad_norm": 3.11828351020813, - "learning_rate": 4.710071511646324e-07, - "loss": 1.0932, - "step": 3580 - }, - { - "epoch": 2.7103122043519394, - "grad_norm": 2.1355981826782227, - "learning_rate": 4.685804276108169e-07, - "loss": 1.1196, - "step": 3581 - }, - { - "epoch": 2.7110690633869443, - "grad_norm": 2.2099850177764893, - "learning_rate": 4.6615984850928456e-07, - "loss": 1.1028, - "step": 3582 - }, - { - "epoch": 2.7118259224219488, - "grad_norm": 1.9474663734436035, - "learning_rate": 4.637454154203033e-07, - "loss": 1.07, - "step": 3583 - }, - { - "epoch": 2.7125827814569536, - "grad_norm": 2.1069188117980957, - "learning_rate": 4.613371299001815e-07, - "loss": 1.0899, - "step": 3584 - }, - { - "epoch": 2.7133396404919585, - "grad_norm": 2.008517265319824, - "learning_rate": 4.58934993501263e-07, - "loss": 1.0885, - "step": 3585 - }, - { - "epoch": 2.714096499526963, - "grad_norm": 1.88406503200531, - "learning_rate": 4.5653900777192763e-07, - "loss": 1.0659, - "step": 3586 - }, - { - "epoch": 2.714853358561968, - "grad_norm": 2.1920740604400635, - "learning_rate": 4.5414917425659094e-07, - "loss": 1.1038, - "step": 3587 - }, - { - "epoch": 2.7156102175969723, - "grad_norm": 2.047375440597534, - "learning_rate": 4.5176549449570765e-07, - "loss": 1.0542, - "step": 3588 - }, - { - "epoch": 2.716367076631977, - "grad_norm": 1.9768850803375244, - "learning_rate": 4.4938797002575485e-07, - "loss": 1.0511, - "step": 3589 - }, - { - "epoch": 2.717123935666982, - "grad_norm": 2.005725145339966, - "learning_rate": 4.4701660237925116e-07, - "loss": 1.0859, - "step": 3590 - }, - { - "epoch": 2.717880794701987, - "grad_norm": 2.0299482345581055, - "learning_rate": 4.446513930847431e-07, - "loss": 1.052, - "step": 3591 - }, - { - "epoch": 2.7186376537369914, - "grad_norm": 2.217197895050049, - "learning_rate": 4.4229234366681054e-07, - "loss": 1.083, - "step": 3592 - }, - { - "epoch": 2.7193945127719963, - "grad_norm": 2.225231170654297, - "learning_rate": 4.399394556460618e-07, - "loss": 1.1048, - "step": 3593 - }, - { - "epoch": 2.7201513718070007, - "grad_norm": 2.035879135131836, - "learning_rate": 4.375927305391286e-07, - "loss": 1.0064, - "step": 3594 - }, - { - "epoch": 2.7209082308420056, - "grad_norm": 2.046074628829956, - "learning_rate": 4.352521698586783e-07, - "loss": 1.0649, - "step": 3595 - }, - { - "epoch": 2.7216650898770105, - "grad_norm": 2.068490743637085, - "learning_rate": 4.329177751133964e-07, - "loss": 1.0509, - "step": 3596 - }, - { - "epoch": 2.7224219489120154, - "grad_norm": 2.191215991973877, - "learning_rate": 4.305895478079998e-07, - "loss": 1.0413, - "step": 3597 - }, - { - "epoch": 2.72317880794702, - "grad_norm": 2.310241937637329, - "learning_rate": 4.2826748944323e-07, - "loss": 1.0864, - "step": 3598 - }, - { - "epoch": 2.7239356669820247, - "grad_norm": 2.196274757385254, - "learning_rate": 4.2595160151584996e-07, - "loss": 1.0302, - "step": 3599 - }, - { - "epoch": 2.724692526017029, - "grad_norm": 2.0941972732543945, - "learning_rate": 4.2364188551864284e-07, - "loss": 1.0968, - "step": 3600 - }, - { - "epoch": 2.725449385052034, - "grad_norm": 2.1524224281311035, - "learning_rate": 4.213383429404197e-07, - "loss": 1.0739, - "step": 3601 - }, - { - "epoch": 2.726206244087039, - "grad_norm": 2.400557518005371, - "learning_rate": 4.190409752660077e-07, - "loss": 1.1176, - "step": 3602 - }, - { - "epoch": 2.7269631031220434, - "grad_norm": 2.0198590755462646, - "learning_rate": 4.16749783976255e-07, - "loss": 1.0351, - "step": 3603 - }, - { - "epoch": 2.7277199621570483, - "grad_norm": 2.151195526123047, - "learning_rate": 4.144647705480291e-07, - "loss": 1.0867, - "step": 3604 - }, - { - "epoch": 2.7284768211920527, - "grad_norm": 1.927239179611206, - "learning_rate": 4.1218593645421344e-07, - "loss": 1.0605, - "step": 3605 - }, - { - "epoch": 2.7292336802270576, - "grad_norm": 2.175260066986084, - "learning_rate": 4.099132831637103e-07, - "loss": 1.0312, - "step": 3606 - }, - { - "epoch": 2.7299905392620625, - "grad_norm": 2.2161762714385986, - "learning_rate": 4.0764681214143794e-07, - "loss": 1.0217, - "step": 3607 - }, - { - "epoch": 2.7307473982970674, - "grad_norm": 2.06466007232666, - "learning_rate": 4.053865248483281e-07, - "loss": 1.0851, - "step": 3608 - }, - { - "epoch": 2.731504257332072, - "grad_norm": 2.1965982913970947, - "learning_rate": 4.031324227413297e-07, - "loss": 1.0758, - "step": 3609 - }, - { - "epoch": 2.7322611163670767, - "grad_norm": 2.31892728805542, - "learning_rate": 4.008845072734016e-07, - "loss": 1.1159, - "step": 3610 - }, - { - "epoch": 2.733017975402081, - "grad_norm": 2.0228688716888428, - "learning_rate": 3.986427798935131e-07, - "loss": 1.0769, - "step": 3611 - }, - { - "epoch": 2.733774834437086, - "grad_norm": 2.0157992839813232, - "learning_rate": 3.964072420466503e-07, - "loss": 1.0597, - "step": 3612 - }, - { - "epoch": 2.734531693472091, - "grad_norm": 1.9818907976150513, - "learning_rate": 3.9417789517380527e-07, - "loss": 1.0732, - "step": 3613 - }, - { - "epoch": 2.735288552507096, - "grad_norm": 2.1533520221710205, - "learning_rate": 3.919547407119824e-07, - "loss": 1.063, - "step": 3614 - }, - { - "epoch": 2.7360454115421002, - "grad_norm": 2.073683738708496, - "learning_rate": 3.897377800941943e-07, - "loss": 1.0551, - "step": 3615 - }, - { - "epoch": 2.736802270577105, - "grad_norm": 2.021272897720337, - "learning_rate": 3.875270147494558e-07, - "loss": 1.1027, - "step": 3616 - }, - { - "epoch": 2.7375591296121096, - "grad_norm": 2.308957099914551, - "learning_rate": 3.853224461027956e-07, - "loss": 1.076, - "step": 3617 - }, - { - "epoch": 2.7383159886471145, - "grad_norm": 2.239806652069092, - "learning_rate": 3.8312407557524466e-07, - "loss": 1.0998, - "step": 3618 - }, - { - "epoch": 2.7390728476821193, - "grad_norm": 2.1331143379211426, - "learning_rate": 3.8093190458383777e-07, - "loss": 1.1259, - "step": 3619 - }, - { - "epoch": 2.739829706717124, - "grad_norm": 2.0615665912628174, - "learning_rate": 3.7874593454161647e-07, - "loss": 1.0191, - "step": 3620 - }, - { - "epoch": 2.7405865657521287, - "grad_norm": 1.9834305047988892, - "learning_rate": 3.7656616685762473e-07, - "loss": 1.0553, - "step": 3621 - }, - { - "epoch": 2.7413434247871336, - "grad_norm": 2.1964480876922607, - "learning_rate": 3.7439260293690597e-07, - "loss": 1.0388, - "step": 3622 - }, - { - "epoch": 2.742100283822138, - "grad_norm": 1.9631261825561523, - "learning_rate": 3.722252441805057e-07, - "loss": 1.0515, - "step": 3623 - }, - { - "epoch": 2.742857142857143, - "grad_norm": 2.0425281524658203, - "learning_rate": 3.7006409198547004e-07, - "loss": 1.0117, - "step": 3624 - }, - { - "epoch": 2.7436140018921478, - "grad_norm": 2.2030279636383057, - "learning_rate": 3.6790914774484625e-07, - "loss": 1.0627, - "step": 3625 - }, - { - "epoch": 2.744370860927152, - "grad_norm": 2.0677294731140137, - "learning_rate": 3.6576041284767873e-07, - "loss": 1.0504, - "step": 3626 - }, - { - "epoch": 2.745127719962157, - "grad_norm": 1.951145887374878, - "learning_rate": 3.6361788867900865e-07, - "loss": 1.0804, - "step": 3627 - }, - { - "epoch": 2.7458845789971615, - "grad_norm": 2.083099126815796, - "learning_rate": 3.614815766198731e-07, - "loss": 1.0902, - "step": 3628 - }, - { - "epoch": 2.7466414380321664, - "grad_norm": 2.0568675994873047, - "learning_rate": 3.593514780473093e-07, - "loss": 1.0659, - "step": 3629 - }, - { - "epoch": 2.7473982970671713, - "grad_norm": 2.0525779724121094, - "learning_rate": 3.572275943343428e-07, - "loss": 1.0885, - "step": 3630 - }, - { - "epoch": 2.748155156102176, - "grad_norm": 2.105832576751709, - "learning_rate": 3.55109926849998e-07, - "loss": 1.0754, - "step": 3631 - }, - { - "epoch": 2.7489120151371806, - "grad_norm": 2.1376736164093018, - "learning_rate": 3.5299847695929306e-07, - "loss": 1.1257, - "step": 3632 - }, - { - "epoch": 2.7496688741721855, - "grad_norm": 1.8944578170776367, - "learning_rate": 3.508932460232331e-07, - "loss": 1.0465, - "step": 3633 - }, - { - "epoch": 2.75042573320719, - "grad_norm": 2.0414884090423584, - "learning_rate": 3.4879423539882017e-07, - "loss": 1.0621, - "step": 3634 - }, - { - "epoch": 2.751182592242195, - "grad_norm": 2.1808700561523438, - "learning_rate": 3.467014464390431e-07, - "loss": 1.0513, - "step": 3635 - }, - { - "epoch": 2.7519394512771997, - "grad_norm": 2.060415506362915, - "learning_rate": 3.446148804928836e-07, - "loss": 1.0927, - "step": 3636 - }, - { - "epoch": 2.752696310312204, - "grad_norm": 1.9510079622268677, - "learning_rate": 3.425345389053098e-07, - "loss": 1.0684, - "step": 3637 - }, - { - "epoch": 2.753453169347209, - "grad_norm": 1.9349720478057861, - "learning_rate": 3.4046042301727504e-07, - "loss": 1.0437, - "step": 3638 - }, - { - "epoch": 2.754210028382214, - "grad_norm": 1.9200588464736938, - "learning_rate": 3.383925341657259e-07, - "loss": 1.0417, - "step": 3639 - }, - { - "epoch": 2.7549668874172184, - "grad_norm": 1.9135462045669556, - "learning_rate": 3.363308736835918e-07, - "loss": 1.0593, - "step": 3640 - }, - { - "epoch": 2.7557237464522233, - "grad_norm": 2.030207633972168, - "learning_rate": 3.342754428997865e-07, - "loss": 1.0311, - "step": 3641 - }, - { - "epoch": 2.756480605487228, - "grad_norm": 2.0563161373138428, - "learning_rate": 3.3222624313920995e-07, - "loss": 1.1101, - "step": 3642 - }, - { - "epoch": 2.7572374645222326, - "grad_norm": 2.1681125164031982, - "learning_rate": 3.301832757227478e-07, - "loss": 1.1007, - "step": 3643 - }, - { - "epoch": 2.7579943235572375, - "grad_norm": 1.9184566736221313, - "learning_rate": 3.281465419672603e-07, - "loss": 1.0738, - "step": 3644 - }, - { - "epoch": 2.758751182592242, - "grad_norm": 2.2246665954589844, - "learning_rate": 3.261160431855982e-07, - "loss": 1.0967, - "step": 3645 - }, - { - "epoch": 2.759508041627247, - "grad_norm": 2.213003396987915, - "learning_rate": 3.240917806865891e-07, - "loss": 1.1179, - "step": 3646 - }, - { - "epoch": 2.7602649006622517, - "grad_norm": 2.001859426498413, - "learning_rate": 3.2207375577504196e-07, - "loss": 1.0601, - "step": 3647 - }, - { - "epoch": 2.7610217596972566, - "grad_norm": 1.9976732730865479, - "learning_rate": 3.2006196975174716e-07, - "loss": 1.0809, - "step": 3648 - }, - { - "epoch": 2.761778618732261, - "grad_norm": 2.0903263092041016, - "learning_rate": 3.1805642391346757e-07, - "loss": 1.1196, - "step": 3649 - }, - { - "epoch": 2.762535477767266, - "grad_norm": 2.2454066276550293, - "learning_rate": 3.160571195529498e-07, - "loss": 1.1021, - "step": 3650 - }, - { - "epoch": 2.7632923368022704, - "grad_norm": 2.1280694007873535, - "learning_rate": 3.1406405795891286e-07, - "loss": 1.1027, - "step": 3651 - }, - { - "epoch": 2.7640491958372753, - "grad_norm": 2.1310126781463623, - "learning_rate": 3.1207724041605493e-07, - "loss": 0.9978, - "step": 3652 - }, - { - "epoch": 2.76480605487228, - "grad_norm": 2.2121293544769287, - "learning_rate": 3.1009666820505004e-07, - "loss": 1.0708, - "step": 3653 - }, - { - "epoch": 2.765562913907285, - "grad_norm": 2.256673812866211, - "learning_rate": 3.081223426025437e-07, - "loss": 1.094, - "step": 3654 - }, - { - "epoch": 2.7663197729422895, - "grad_norm": 2.2821056842803955, - "learning_rate": 3.0615426488115385e-07, - "loss": 1.0542, - "step": 3655 - }, - { - "epoch": 2.7670766319772944, - "grad_norm": 2.1040828227996826, - "learning_rate": 3.0419243630947764e-07, - "loss": 1.0439, - "step": 3656 - }, - { - "epoch": 2.767833491012299, - "grad_norm": 2.050218343734741, - "learning_rate": 3.022368581520758e-07, - "loss": 1.0747, - "step": 3657 - }, - { - "epoch": 2.7685903500473037, - "grad_norm": 1.962795376777649, - "learning_rate": 3.0028753166948504e-07, - "loss": 1.1227, - "step": 3658 - }, - { - "epoch": 2.7693472090823086, - "grad_norm": 2.256727933883667, - "learning_rate": 2.983444581182144e-07, - "loss": 1.123, - "step": 3659 - }, - { - "epoch": 2.770104068117313, - "grad_norm": 2.0236082077026367, - "learning_rate": 2.964076387507367e-07, - "loss": 1.0793, - "step": 3660 - }, - { - "epoch": 2.770860927152318, - "grad_norm": 2.2242016792297363, - "learning_rate": 2.944770748154961e-07, - "loss": 1.0849, - "step": 3661 - }, - { - "epoch": 2.7716177861873224, - "grad_norm": 1.8702110052108765, - "learning_rate": 2.9255276755690594e-07, - "loss": 1.0598, - "step": 3662 - }, - { - "epoch": 2.7723746452223272, - "grad_norm": 2.0809333324432373, - "learning_rate": 2.9063471821534544e-07, - "loss": 1.1008, - "step": 3663 - }, - { - "epoch": 2.773131504257332, - "grad_norm": 2.0802369117736816, - "learning_rate": 2.8872292802715856e-07, - "loss": 1.0757, - "step": 3664 - }, - { - "epoch": 2.773888363292337, - "grad_norm": 2.361516237258911, - "learning_rate": 2.868173982246573e-07, - "loss": 1.1079, - "step": 3665 - }, - { - "epoch": 2.7746452223273415, - "grad_norm": 2.069173574447632, - "learning_rate": 2.8491813003611614e-07, - "loss": 1.0559, - "step": 3666 - }, - { - "epoch": 2.7754020813623463, - "grad_norm": 1.9263156652450562, - "learning_rate": 2.830251246857745e-07, - "loss": 1.054, - "step": 3667 - }, - { - "epoch": 2.776158940397351, - "grad_norm": 1.9880831241607666, - "learning_rate": 2.811383833938352e-07, - "loss": 1.0915, - "step": 3668 - }, - { - "epoch": 2.7769157994323557, - "grad_norm": 1.977330207824707, - "learning_rate": 2.7925790737646375e-07, - "loss": 1.0582, - "step": 3669 - }, - { - "epoch": 2.7776726584673606, - "grad_norm": 2.2954440116882324, - "learning_rate": 2.7738369784578694e-07, - "loss": 1.093, - "step": 3670 - }, - { - "epoch": 2.7784295175023654, - "grad_norm": 1.9425572156906128, - "learning_rate": 2.755157560098875e-07, - "loss": 1.0558, - "step": 3671 - }, - { - "epoch": 2.77918637653737, - "grad_norm": 1.9914302825927734, - "learning_rate": 2.736540830728152e-07, - "loss": 1.1168, - "step": 3672 - }, - { - "epoch": 2.7799432355723748, - "grad_norm": 2.00032114982605, - "learning_rate": 2.717986802345765e-07, - "loss": 1.0692, - "step": 3673 - }, - { - "epoch": 2.780700094607379, - "grad_norm": 2.004713535308838, - "learning_rate": 2.6994954869113416e-07, - "loss": 1.0587, - "step": 3674 - }, - { - "epoch": 2.781456953642384, - "grad_norm": 2.0385048389434814, - "learning_rate": 2.6810668963441194e-07, - "loss": 1.064, - "step": 3675 - }, - { - "epoch": 2.782213812677389, - "grad_norm": 1.966386079788208, - "learning_rate": 2.662701042522874e-07, - "loss": 1.0086, - "step": 3676 - }, - { - "epoch": 2.7829706717123934, - "grad_norm": 2.2672669887542725, - "learning_rate": 2.644397937285963e-07, - "loss": 1.0867, - "step": 3677 - }, - { - "epoch": 2.7837275307473983, - "grad_norm": 1.9181667566299438, - "learning_rate": 2.626157592431278e-07, - "loss": 1.0969, - "step": 3678 - }, - { - "epoch": 2.7844843897824028, - "grad_norm": 2.0945870876312256, - "learning_rate": 2.607980019716272e-07, - "loss": 1.0493, - "step": 3679 - }, - { - "epoch": 2.7852412488174076, - "grad_norm": 1.9859826564788818, - "learning_rate": 2.589865230857932e-07, - "loss": 1.0695, - "step": 3680 - }, - { - "epoch": 2.7859981078524125, - "grad_norm": 1.9504257440567017, - "learning_rate": 2.5718132375327933e-07, - "loss": 1.0653, - "step": 3681 - }, - { - "epoch": 2.7867549668874174, - "grad_norm": 1.9905445575714111, - "learning_rate": 2.5538240513768625e-07, - "loss": 1.0907, - "step": 3682 - }, - { - "epoch": 2.787511825922422, - "grad_norm": 2.076355457305908, - "learning_rate": 2.535897683985702e-07, - "loss": 1.0448, - "step": 3683 - }, - { - "epoch": 2.7882686849574267, - "grad_norm": 1.986864447593689, - "learning_rate": 2.518034146914401e-07, - "loss": 1.0296, - "step": 3684 - }, - { - "epoch": 2.789025543992431, - "grad_norm": 1.9109041690826416, - "learning_rate": 2.5002334516774865e-07, - "loss": 1.0455, - "step": 3685 - }, - { - "epoch": 2.789782403027436, - "grad_norm": 2.183528423309326, - "learning_rate": 2.482495609749042e-07, - "loss": 1.0824, - "step": 3686 - }, - { - "epoch": 2.790539262062441, - "grad_norm": 2.090740203857422, - "learning_rate": 2.4648206325626e-07, - "loss": 1.0755, - "step": 3687 - }, - { - "epoch": 2.791296121097446, - "grad_norm": 2.1614151000976562, - "learning_rate": 2.447208531511184e-07, - "loss": 1.0617, - "step": 3688 - }, - { - "epoch": 2.7920529801324503, - "grad_norm": 1.9354277849197388, - "learning_rate": 2.429659317947277e-07, - "loss": 1.0702, - "step": 3689 - }, - { - "epoch": 2.792809839167455, - "grad_norm": 2.077448606491089, - "learning_rate": 2.412173003182842e-07, - "loss": 1.0656, - "step": 3690 - }, - { - "epoch": 2.7935666982024596, - "grad_norm": 1.9370477199554443, - "learning_rate": 2.394749598489302e-07, - "loss": 1.1324, - "step": 3691 - }, - { - "epoch": 2.7943235572374645, - "grad_norm": 2.0902650356292725, - "learning_rate": 2.3773891150975041e-07, - "loss": 1.1173, - "step": 3692 - }, - { - "epoch": 2.7950804162724694, - "grad_norm": 2.1968994140625, - "learning_rate": 2.3600915641977443e-07, - "loss": 1.1001, - "step": 3693 - }, - { - "epoch": 2.795837275307474, - "grad_norm": 1.9441262483596802, - "learning_rate": 2.342856956939765e-07, - "loss": 1.0932, - "step": 3694 - }, - { - "epoch": 2.7965941343424787, - "grad_norm": 2.0278730392456055, - "learning_rate": 2.3256853044327348e-07, - "loss": 1.1073, - "step": 3695 - }, - { - "epoch": 2.7973509933774836, - "grad_norm": 1.9890429973602295, - "learning_rate": 2.308576617745247e-07, - "loss": 1.0403, - "step": 3696 - }, - { - "epoch": 2.798107852412488, - "grad_norm": 1.9365586042404175, - "learning_rate": 2.2915309079052886e-07, - "loss": 1.0604, - "step": 3697 - }, - { - "epoch": 2.798864711447493, - "grad_norm": 2.051670789718628, - "learning_rate": 2.2745481859002917e-07, - "loss": 1.0739, - "step": 3698 - }, - { - "epoch": 2.799621570482498, - "grad_norm": 1.945073127746582, - "learning_rate": 2.2576284626770157e-07, - "loss": 1.0621, - "step": 3699 - }, - { - "epoch": 2.8003784295175023, - "grad_norm": 2.0430312156677246, - "learning_rate": 2.2407717491416676e-07, - "loss": 1.058, - "step": 3700 - }, - { - "epoch": 2.801135288552507, - "grad_norm": 2.074920177459717, - "learning_rate": 2.2239780561598455e-07, - "loss": 1.0765, - "step": 3701 - }, - { - "epoch": 2.8018921475875116, - "grad_norm": 2.323629140853882, - "learning_rate": 2.2072473945564961e-07, - "loss": 1.1025, - "step": 3702 - }, - { - "epoch": 2.8026490066225165, - "grad_norm": 1.8663524389266968, - "learning_rate": 2.1905797751159689e-07, - "loss": 1.0688, - "step": 3703 - }, - { - "epoch": 2.8034058656575214, - "grad_norm": 2.141047239303589, - "learning_rate": 2.1739752085819388e-07, - "loss": 1.0787, - "step": 3704 - }, - { - "epoch": 2.8041627246925263, - "grad_norm": 2.179725408554077, - "learning_rate": 2.15743370565744e-07, - "loss": 1.0848, - "step": 3705 - }, - { - "epoch": 2.8049195837275307, - "grad_norm": 2.0025246143341064, - "learning_rate": 2.1409552770048975e-07, - "loss": 1.0256, - "step": 3706 - }, - { - "epoch": 2.8056764427625356, - "grad_norm": 2.1321537494659424, - "learning_rate": 2.124539933246042e-07, - "loss": 1.1045, - "step": 3707 - }, - { - "epoch": 2.80643330179754, - "grad_norm": 2.0465590953826904, - "learning_rate": 2.108187684961972e-07, - "loss": 1.1277, - "step": 3708 - }, - { - "epoch": 2.807190160832545, - "grad_norm": 2.1039795875549316, - "learning_rate": 2.091898542693078e-07, - "loss": 1.0712, - "step": 3709 - }, - { - "epoch": 2.80794701986755, - "grad_norm": 2.3281686305999756, - "learning_rate": 2.0756725169391007e-07, - "loss": 1.1043, - "step": 3710 - }, - { - "epoch": 2.8087038789025542, - "grad_norm": 2.027113914489746, - "learning_rate": 2.0595096181591037e-07, - "loss": 1.1017, - "step": 3711 - }, - { - "epoch": 2.809460737937559, - "grad_norm": 2.1701509952545166, - "learning_rate": 2.04340985677141e-07, - "loss": 1.0812, - "step": 3712 - }, - { - "epoch": 2.810217596972564, - "grad_norm": 2.0328516960144043, - "learning_rate": 2.0273732431537025e-07, - "loss": 1.0951, - "step": 3713 - }, - { - "epoch": 2.8109744560075685, - "grad_norm": 2.0888283252716064, - "learning_rate": 2.0113997876429446e-07, - "loss": 1.1183, - "step": 3714 - }, - { - "epoch": 2.8117313150425733, - "grad_norm": 1.8546501398086548, - "learning_rate": 1.9954895005353692e-07, - "loss": 1.0742, - "step": 3715 - }, - { - "epoch": 2.8124881740775782, - "grad_norm": 1.985583782196045, - "learning_rate": 1.9796423920865021e-07, - "loss": 1.1027, - "step": 3716 - }, - { - "epoch": 2.8132450331125827, - "grad_norm": 2.0510141849517822, - "learning_rate": 1.9638584725111498e-07, - "loss": 1.0609, - "step": 3717 - }, - { - "epoch": 2.8140018921475876, - "grad_norm": 2.359945058822632, - "learning_rate": 1.9481377519834112e-07, - "loss": 1.0421, - "step": 3718 - }, - { - "epoch": 2.814758751182592, - "grad_norm": 2.107235908508301, - "learning_rate": 1.9324802406365883e-07, - "loss": 1.0114, - "step": 3719 - }, - { - "epoch": 2.815515610217597, - "grad_norm": 1.9575122594833374, - "learning_rate": 1.9168859485632866e-07, - "loss": 1.0763, - "step": 3720 - }, - { - "epoch": 2.8162724692526018, - "grad_norm": 2.15492582321167, - "learning_rate": 1.901354885815348e-07, - "loss": 1.0527, - "step": 3721 - }, - { - "epoch": 2.8170293282876067, - "grad_norm": 2.049591302871704, - "learning_rate": 1.8858870624038632e-07, - "loss": 1.0314, - "step": 3722 - }, - { - "epoch": 2.817786187322611, - "grad_norm": 2.623854875564575, - "learning_rate": 1.8704824882991584e-07, - "loss": 1.0589, - "step": 3723 - }, - { - "epoch": 2.818543046357616, - "grad_norm": 1.8997153043746948, - "learning_rate": 1.8551411734307744e-07, - "loss": 1.0561, - "step": 3724 - }, - { - "epoch": 2.8192999053926204, - "grad_norm": 2.2084269523620605, - "learning_rate": 1.8398631276875118e-07, - "loss": 1.0703, - "step": 3725 - }, - { - "epoch": 2.8200567644276253, - "grad_norm": 1.8947069644927979, - "learning_rate": 1.82464836091734e-07, - "loss": 1.0454, - "step": 3726 - }, - { - "epoch": 2.82081362346263, - "grad_norm": 1.8694313764572144, - "learning_rate": 1.8094968829274663e-07, - "loss": 1.0885, - "step": 3727 - }, - { - "epoch": 2.821570482497635, - "grad_norm": 2.0910801887512207, - "learning_rate": 1.7944087034843233e-07, - "loss": 1.0546, - "step": 3728 - }, - { - "epoch": 2.8223273415326395, - "grad_norm": 1.8177095651626587, - "learning_rate": 1.7793838323135016e-07, - "loss": 1.0405, - "step": 3729 - }, - { - "epoch": 2.8230842005676444, - "grad_norm": 2.050400733947754, - "learning_rate": 1.7644222790998186e-07, - "loss": 1.0905, - "step": 3730 - }, - { - "epoch": 2.823841059602649, - "grad_norm": 1.9098093509674072, - "learning_rate": 1.7495240534872614e-07, - "loss": 1.031, - "step": 3731 - }, - { - "epoch": 2.8245979186376537, - "grad_norm": 2.1355783939361572, - "learning_rate": 1.734689165078998e-07, - "loss": 1.0944, - "step": 3732 - }, - { - "epoch": 2.8253547776726586, - "grad_norm": 1.9840859174728394, - "learning_rate": 1.7199176234373553e-07, - "loss": 0.9905, - "step": 3733 - }, - { - "epoch": 2.826111636707663, - "grad_norm": 2.0721471309661865, - "learning_rate": 1.7052094380838532e-07, - "loss": 1.059, - "step": 3734 - }, - { - "epoch": 2.826868495742668, - "grad_norm": 2.345816135406494, - "learning_rate": 1.69056461849917e-07, - "loss": 1.0611, - "step": 3735 - }, - { - "epoch": 2.8276253547776724, - "grad_norm": 2.2599689960479736, - "learning_rate": 1.675983174123143e-07, - "loss": 1.0965, - "step": 3736 - }, - { - "epoch": 2.8283822138126773, - "grad_norm": 2.1439452171325684, - "learning_rate": 1.6614651143547243e-07, - "loss": 1.0312, - "step": 3737 - }, - { - "epoch": 2.829139072847682, - "grad_norm": 2.0652458667755127, - "learning_rate": 1.647010448552047e-07, - "loss": 1.0469, - "step": 3738 - }, - { - "epoch": 2.829895931882687, - "grad_norm": 2.0724799633026123, - "learning_rate": 1.63261918603237e-07, - "loss": 1.1211, - "step": 3739 - }, - { - "epoch": 2.8306527909176915, - "grad_norm": 1.8345634937286377, - "learning_rate": 1.618291336072078e-07, - "loss": 1.0573, - "step": 3740 - }, - { - "epoch": 2.8314096499526964, - "grad_norm": 1.9076229333877563, - "learning_rate": 1.6040269079066806e-07, - "loss": 1.0767, - "step": 3741 - }, - { - "epoch": 2.832166508987701, - "grad_norm": 1.9758639335632324, - "learning_rate": 1.5898259107308255e-07, - "loss": 1.0678, - "step": 3742 - }, - { - "epoch": 2.8329233680227057, - "grad_norm": 1.982330560684204, - "learning_rate": 1.5756883536982296e-07, - "loss": 1.1126, - "step": 3743 - }, - { - "epoch": 2.8336802270577106, - "grad_norm": 1.9688644409179688, - "learning_rate": 1.5616142459217799e-07, - "loss": 1.1118, - "step": 3744 - }, - { - "epoch": 2.8344370860927155, - "grad_norm": 2.031545639038086, - "learning_rate": 1.5476035964734117e-07, - "loss": 1.061, - "step": 3745 - }, - { - "epoch": 2.83519394512772, - "grad_norm": 1.9219672679901123, - "learning_rate": 1.5336564143841856e-07, - "loss": 1.031, - "step": 3746 - }, - { - "epoch": 2.835950804162725, - "grad_norm": 2.1881892681121826, - "learning_rate": 1.5197727086442445e-07, - "loss": 1.0689, - "step": 3747 - }, - { - "epoch": 2.8367076631977293, - "grad_norm": 1.885879397392273, - "learning_rate": 1.505952488202789e-07, - "loss": 1.0866, - "step": 3748 - }, - { - "epoch": 2.837464522232734, - "grad_norm": 2.17256760597229, - "learning_rate": 1.492195761968146e-07, - "loss": 1.0774, - "step": 3749 - }, - { - "epoch": 2.838221381267739, - "grad_norm": 2.0141475200653076, - "learning_rate": 1.4785025388076906e-07, - "loss": 1.1078, - "step": 3750 - }, - { - "epoch": 2.8389782403027435, - "grad_norm": 2.1120545864105225, - "learning_rate": 1.4648728275478566e-07, - "loss": 1.0698, - "step": 3751 - }, - { - "epoch": 2.8397350993377484, - "grad_norm": 1.9993555545806885, - "learning_rate": 1.451306636974159e-07, - "loss": 1.0529, - "step": 3752 - }, - { - "epoch": 2.840491958372753, - "grad_norm": 1.9042015075683594, - "learning_rate": 1.4378039758311616e-07, - "loss": 1.035, - "step": 3753 - }, - { - "epoch": 2.8412488174077577, - "grad_norm": 2.0726895332336426, - "learning_rate": 1.4243648528224414e-07, - "loss": 1.0772, - "step": 3754 - }, - { - "epoch": 2.8420056764427626, - "grad_norm": 2.004347085952759, - "learning_rate": 1.4109892766106804e-07, - "loss": 1.066, - "step": 3755 - }, - { - "epoch": 2.8427625354777675, - "grad_norm": 2.1998095512390137, - "learning_rate": 1.397677255817563e-07, - "loss": 1.1011, - "step": 3756 - }, - { - "epoch": 2.843519394512772, - "grad_norm": 1.9528348445892334, - "learning_rate": 1.3844287990238113e-07, - "loss": 1.105, - "step": 3757 - }, - { - "epoch": 2.844276253547777, - "grad_norm": 2.1867001056671143, - "learning_rate": 1.3712439147691946e-07, - "loss": 1.0787, - "step": 3758 - }, - { - "epoch": 2.8450331125827812, - "grad_norm": 2.0233795642852783, - "learning_rate": 1.3581226115524753e-07, - "loss": 1.0587, - "step": 3759 - }, - { - "epoch": 2.845789971617786, - "grad_norm": 1.9928818941116333, - "learning_rate": 1.345064897831441e-07, - "loss": 1.0421, - "step": 3760 - }, - { - "epoch": 2.846546830652791, - "grad_norm": 1.9571059942245483, - "learning_rate": 1.3320707820229063e-07, - "loss": 1.0569, - "step": 3761 - }, - { - "epoch": 2.847303689687796, - "grad_norm": 2.076955795288086, - "learning_rate": 1.3191402725026765e-07, - "loss": 1.0854, - "step": 3762 - }, - { - "epoch": 2.8480605487228003, - "grad_norm": 2.1233267784118652, - "learning_rate": 1.3062733776055504e-07, - "loss": 1.0457, - "step": 3763 - }, - { - "epoch": 2.8488174077578052, - "grad_norm": 1.9417656660079956, - "learning_rate": 1.2934701056253526e-07, - "loss": 1.0308, - "step": 3764 - }, - { - "epoch": 2.8495742667928097, - "grad_norm": 1.9117321968078613, - "learning_rate": 1.2807304648148552e-07, - "loss": 1.0519, - "step": 3765 - }, - { - "epoch": 2.8503311258278146, - "grad_norm": 1.987637996673584, - "learning_rate": 1.2680544633858457e-07, - "loss": 1.0602, - "step": 3766 - }, - { - "epoch": 2.8510879848628194, - "grad_norm": 2.072512626647949, - "learning_rate": 1.2554421095090923e-07, - "loss": 1.1063, - "step": 3767 - }, - { - "epoch": 2.851844843897824, - "grad_norm": 2.4176509380340576, - "learning_rate": 1.2428934113143005e-07, - "loss": 1.0889, - "step": 3768 - }, - { - "epoch": 2.8526017029328288, - "grad_norm": 2.25588059425354, - "learning_rate": 1.2304083768902016e-07, - "loss": 1.0894, - "step": 3769 - }, - { - "epoch": 2.853358561967833, - "grad_norm": 1.9961562156677246, - "learning_rate": 1.2179870142844305e-07, - "loss": 1.07, - "step": 3770 - }, - { - "epoch": 2.854115421002838, - "grad_norm": 2.0790538787841797, - "learning_rate": 1.2056293315036139e-07, - "loss": 1.1308, - "step": 3771 - }, - { - "epoch": 2.854872280037843, - "grad_norm": 2.107841968536377, - "learning_rate": 1.1933353365133393e-07, - "loss": 1.1053, - "step": 3772 - }, - { - "epoch": 2.855629139072848, - "grad_norm": 1.9669723510742188, - "learning_rate": 1.1811050372381292e-07, - "loss": 1.1049, - "step": 3773 - }, - { - "epoch": 2.8563859981078523, - "grad_norm": 1.9607486724853516, - "learning_rate": 1.1689384415614223e-07, - "loss": 1.0817, - "step": 3774 - }, - { - "epoch": 2.857142857142857, - "grad_norm": 1.8808550834655762, - "learning_rate": 1.1568355573256491e-07, - "loss": 1.0328, - "step": 3775 - }, - { - "epoch": 2.8578997161778616, - "grad_norm": 2.0765459537506104, - "learning_rate": 1.1447963923321327e-07, - "loss": 1.0449, - "step": 3776 - }, - { - "epoch": 2.8586565752128665, - "grad_norm": 1.9006658792495728, - "learning_rate": 1.1328209543411224e-07, - "loss": 1.0046, - "step": 3777 - }, - { - "epoch": 2.8594134342478714, - "grad_norm": 2.1098666191101074, - "learning_rate": 1.1209092510718261e-07, - "loss": 1.0837, - "step": 3778 - }, - { - "epoch": 2.8601702932828763, - "grad_norm": 2.088935375213623, - "learning_rate": 1.1090612902023337e-07, - "loss": 1.0702, - "step": 3779 - }, - { - "epoch": 2.8609271523178808, - "grad_norm": 2.2443082332611084, - "learning_rate": 1.0972770793696717e-07, - "loss": 1.0864, - "step": 3780 - }, - { - "epoch": 2.8616840113528856, - "grad_norm": 2.380600929260254, - "learning_rate": 1.0855566261697372e-07, - "loss": 1.073, - "step": 3781 - }, - { - "epoch": 2.86244087038789, - "grad_norm": 2.0619399547576904, - "learning_rate": 1.073899938157375e-07, - "loss": 1.0486, - "step": 3782 - }, - { - "epoch": 2.863197729422895, - "grad_norm": 1.9430749416351318, - "learning_rate": 1.0623070228463008e-07, - "loss": 1.0664, - "step": 3783 - }, - { - "epoch": 2.8639545884579, - "grad_norm": 1.9037846326828003, - "learning_rate": 1.0507778877091445e-07, - "loss": 1.041, - "step": 3784 - }, - { - "epoch": 2.8647114474929043, - "grad_norm": 1.8566458225250244, - "learning_rate": 1.0393125401773843e-07, - "loss": 1.066, - "step": 3785 - }, - { - "epoch": 2.865468306527909, - "grad_norm": 1.8834096193313599, - "learning_rate": 1.027910987641447e-07, - "loss": 1.0902, - "step": 3786 - }, - { - "epoch": 2.866225165562914, - "grad_norm": 2.1945738792419434, - "learning_rate": 1.0165732374505733e-07, - "loss": 1.0731, - "step": 3787 - }, - { - "epoch": 2.8669820245979185, - "grad_norm": 2.057518720626831, - "learning_rate": 1.0052992969128971e-07, - "loss": 1.0807, - "step": 3788 - }, - { - "epoch": 2.8677388836329234, - "grad_norm": 1.897512435913086, - "learning_rate": 9.940891732954447e-08, - "loss": 1.0146, - "step": 3789 - }, - { - "epoch": 2.8684957426679283, - "grad_norm": 1.9884440898895264, - "learning_rate": 9.829428738240904e-08, - "loss": 1.0896, - "step": 3790 - }, - { - "epoch": 2.8692526017029327, - "grad_norm": 2.2165613174438477, - "learning_rate": 9.718604056835573e-08, - "loss": 1.014, - "step": 3791 - }, - { - "epoch": 2.8700094607379376, - "grad_norm": 2.517817258834839, - "learning_rate": 9.608417760174488e-08, - "loss": 1.0497, - "step": 3792 - }, - { - "epoch": 2.870766319772942, - "grad_norm": 1.8578647375106812, - "learning_rate": 9.498869919281952e-08, - "loss": 1.064, - "step": 3793 - }, - { - "epoch": 2.871523178807947, - "grad_norm": 2.168428897857666, - "learning_rate": 9.389960604770966e-08, - "loss": 1.1338, - "step": 3794 - }, - { - "epoch": 2.872280037842952, - "grad_norm": 1.902740716934204, - "learning_rate": 9.281689886842575e-08, - "loss": 1.0833, - "step": 3795 - }, - { - "epoch": 2.8730368968779567, - "grad_norm": 2.10799503326416, - "learning_rate": 9.174057835286632e-08, - "loss": 1.0498, - "step": 3796 - }, - { - "epoch": 2.873793755912961, - "grad_norm": 1.9497560262680054, - "learning_rate": 9.067064519481139e-08, - "loss": 1.0824, - "step": 3797 - }, - { - "epoch": 2.874550614947966, - "grad_norm": 2.052687406539917, - "learning_rate": 8.96071000839214e-08, - "loss": 1.0838, - "step": 3798 - }, - { - "epoch": 2.8753074739829705, - "grad_norm": 2.033168315887451, - "learning_rate": 8.854994370574378e-08, - "loss": 1.0696, - "step": 3799 - }, - { - "epoch": 2.8760643330179754, - "grad_norm": 1.928364872932434, - "learning_rate": 8.749917674170415e-08, - "loss": 1.0824, - "step": 3800 - }, - { - "epoch": 2.8768211920529803, - "grad_norm": 1.9337732791900635, - "learning_rate": 8.645479986911066e-08, - "loss": 1.117, - "step": 3801 - }, - { - "epoch": 2.8775780510879847, - "grad_norm": 1.929337501525879, - "learning_rate": 8.541681376115416e-08, - "loss": 1.0448, - "step": 3802 - }, - { - "epoch": 2.8783349101229896, - "grad_norm": 2.0957815647125244, - "learning_rate": 8.438521908690244e-08, - "loss": 1.0833, - "step": 3803 - }, - { - "epoch": 2.8790917691579945, - "grad_norm": 2.090304136276245, - "learning_rate": 8.336001651130706e-08, - "loss": 1.0567, - "step": 3804 - }, - { - "epoch": 2.879848628192999, - "grad_norm": 2.115295648574829, - "learning_rate": 8.234120669519771e-08, - "loss": 1.031, - "step": 3805 - }, - { - "epoch": 2.880605487228004, - "grad_norm": 1.9636808633804321, - "learning_rate": 8.132879029528445e-08, - "loss": 1.0494, - "step": 3806 - }, - { - "epoch": 2.8813623462630087, - "grad_norm": 1.9101999998092651, - "learning_rate": 8.03227679641533e-08, - "loss": 1.0571, - "step": 3807 - }, - { - "epoch": 2.882119205298013, - "grad_norm": 1.9551316499710083, - "learning_rate": 7.932314035027393e-08, - "loss": 1.0658, - "step": 3808 - }, - { - "epoch": 2.882876064333018, - "grad_norm": 2.1239876747131348, - "learning_rate": 7.832990809798869e-08, - "loss": 1.0788, - "step": 3809 - }, - { - "epoch": 2.8836329233680225, - "grad_norm": 1.9690558910369873, - "learning_rate": 7.734307184752134e-08, - "loss": 1.0772, - "step": 3810 - }, - { - "epoch": 2.8843897824030273, - "grad_norm": 2.071542263031006, - "learning_rate": 7.636263223496941e-08, - "loss": 1.0839, - "step": 3811 - }, - { - "epoch": 2.8851466414380322, - "grad_norm": 2.1702964305877686, - "learning_rate": 7.538858989231189e-08, - "loss": 1.0452, - "step": 3812 - }, - { - "epoch": 2.885903500473037, - "grad_norm": 2.1600115299224854, - "learning_rate": 7.442094544740037e-08, - "loss": 1.133, - "step": 3813 - }, - { - "epoch": 2.8866603595080416, - "grad_norm": 1.943969964981079, - "learning_rate": 7.34596995239646e-08, - "loss": 1.0342, - "step": 3814 - }, - { - "epoch": 2.8874172185430464, - "grad_norm": 2.029170513153076, - "learning_rate": 7.250485274160693e-08, - "loss": 1.0983, - "step": 3815 - }, - { - "epoch": 2.888174077578051, - "grad_norm": 2.1345629692077637, - "learning_rate": 7.1556405715809e-08, - "loss": 1.0854, - "step": 3816 - }, - { - "epoch": 2.8889309366130558, - "grad_norm": 1.8675469160079956, - "learning_rate": 7.061435905792389e-08, - "loss": 1.0661, - "step": 3817 - }, - { - "epoch": 2.8896877956480607, - "grad_norm": 2.0254111289978027, - "learning_rate": 6.967871337518176e-08, - "loss": 1.103, - "step": 3818 - }, - { - "epoch": 2.8904446546830656, - "grad_norm": 2.162344455718994, - "learning_rate": 6.874946927068538e-08, - "loss": 1.0878, - "step": 3819 - }, - { - "epoch": 2.89120151371807, - "grad_norm": 2.2124130725860596, - "learning_rate": 6.782662734341012e-08, - "loss": 1.0599, - "step": 3820 - }, - { - "epoch": 2.891958372753075, - "grad_norm": 2.1409800052642822, - "learning_rate": 6.691018818820837e-08, - "loss": 1.0525, - "step": 3821 - }, - { - "epoch": 2.8927152317880793, - "grad_norm": 2.1800687313079834, - "learning_rate": 6.600015239579959e-08, - "loss": 1.0602, - "step": 3822 - }, - { - "epoch": 2.893472090823084, - "grad_norm": 2.0903069972991943, - "learning_rate": 6.50965205527814e-08, - "loss": 1.0851, - "step": 3823 - }, - { - "epoch": 2.894228949858089, - "grad_norm": 1.9317938089370728, - "learning_rate": 6.419929324162068e-08, - "loss": 1.0319, - "step": 3824 - }, - { - "epoch": 2.8949858088930935, - "grad_norm": 2.0327014923095703, - "learning_rate": 6.330847104065472e-08, - "loss": 1.1128, - "step": 3825 - }, - { - "epoch": 2.8957426679280984, - "grad_norm": 2.1695809364318848, - "learning_rate": 6.242405452409559e-08, - "loss": 1.0591, - "step": 3826 - }, - { - "epoch": 2.896499526963103, - "grad_norm": 2.077954053878784, - "learning_rate": 6.154604426202468e-08, - "loss": 1.0295, - "step": 3827 - }, - { - "epoch": 2.8972563859981078, - "grad_norm": 2.0263519287109375, - "learning_rate": 6.067444082039482e-08, - "loss": 1.0147, - "step": 3828 - }, - { - "epoch": 2.8980132450331126, - "grad_norm": 2.1431772708892822, - "learning_rate": 5.980924476102595e-08, - "loss": 1.0512, - "step": 3829 - }, - { - "epoch": 2.8987701040681175, - "grad_norm": 1.9561032056808472, - "learning_rate": 5.895045664161168e-08, - "loss": 1.0426, - "step": 3830 - }, - { - "epoch": 2.899526963103122, - "grad_norm": 2.133995532989502, - "learning_rate": 5.8098077015713814e-08, - "loss": 1.0365, - "step": 3831 - }, - { - "epoch": 2.900283822138127, - "grad_norm": 2.5247886180877686, - "learning_rate": 5.7252106432762304e-08, - "loss": 1.1153, - "step": 3832 - }, - { - "epoch": 2.9010406811731313, - "grad_norm": 1.9548890590667725, - "learning_rate": 5.6412545438057476e-08, - "loss": 1.0739, - "step": 3833 - }, - { - "epoch": 2.901797540208136, - "grad_norm": 1.98203444480896, - "learning_rate": 5.557939457276783e-08, - "loss": 1.0844, - "step": 3834 - }, - { - "epoch": 2.902554399243141, - "grad_norm": 2.1283376216888428, - "learning_rate": 5.475265437393116e-08, - "loss": 1.0675, - "step": 3835 - }, - { - "epoch": 2.903311258278146, - "grad_norm": 1.917360782623291, - "learning_rate": 5.393232537444783e-08, - "loss": 1.0464, - "step": 3836 - }, - { - "epoch": 2.9040681173131504, - "grad_norm": 1.9345555305480957, - "learning_rate": 5.3118408103091954e-08, - "loss": 1.0937, - "step": 3837 - }, - { - "epoch": 2.9048249763481553, - "grad_norm": 2.1080758571624756, - "learning_rate": 5.2310903084502445e-08, - "loss": 1.0984, - "step": 3838 - }, - { - "epoch": 2.9055818353831597, - "grad_norm": 2.0316121578216553, - "learning_rate": 5.150981083918309e-08, - "loss": 1.105, - "step": 3839 - }, - { - "epoch": 2.9063386944181646, - "grad_norm": 2.1214966773986816, - "learning_rate": 5.0715131883506914e-08, - "loss": 1.0481, - "step": 3840 - }, - { - "epoch": 2.9070955534531695, - "grad_norm": 2.003058433532715, - "learning_rate": 4.99268667297129e-08, - "loss": 1.0848, - "step": 3841 - }, - { - "epoch": 2.907852412488174, - "grad_norm": 2.0405402183532715, - "learning_rate": 4.9145015885902656e-08, - "loss": 1.1065, - "step": 3842 - }, - { - "epoch": 2.908609271523179, - "grad_norm": 2.1864330768585205, - "learning_rate": 4.836957985604592e-08, - "loss": 1.1217, - "step": 3843 - }, - { - "epoch": 2.9093661305581833, - "grad_norm": 1.8275071382522583, - "learning_rate": 4.7600559139976164e-08, - "loss": 1.0634, - "step": 3844 - }, - { - "epoch": 2.910122989593188, - "grad_norm": 2.006591320037842, - "learning_rate": 4.683795423339395e-08, - "loss": 1.0702, - "step": 3845 - }, - { - "epoch": 2.910879848628193, - "grad_norm": 2.4923205375671387, - "learning_rate": 4.608176562786352e-08, - "loss": 1.0633, - "step": 3846 - }, - { - "epoch": 2.911636707663198, - "grad_norm": 1.8209044933319092, - "learning_rate": 4.533199381080951e-08, - "loss": 1.0856, - "step": 3847 - }, - { - "epoch": 2.9123935666982024, - "grad_norm": 2.0942399501800537, - "learning_rate": 4.458863926552586e-08, - "loss": 1.0936, - "step": 3848 - }, - { - "epoch": 2.9131504257332073, - "grad_norm": 2.149657964706421, - "learning_rate": 4.385170247116687e-08, - "loss": 1.0472, - "step": 3849 - }, - { - "epoch": 2.9139072847682117, - "grad_norm": 1.905176043510437, - "learning_rate": 4.3121183902750584e-08, - "loss": 1.0524, - "step": 3850 - }, - { - "epoch": 2.9146641438032166, - "grad_norm": 2.1164419651031494, - "learning_rate": 4.2397084031158755e-08, - "loss": 1.0406, - "step": 3851 - }, - { - "epoch": 2.9154210028382215, - "grad_norm": 1.955041527748108, - "learning_rate": 4.1679403323133525e-08, - "loss": 1.0788, - "step": 3852 - }, - { - "epoch": 2.9161778618732264, - "grad_norm": 2.0062403678894043, - "learning_rate": 4.096814224128301e-08, - "loss": 1.1013, - "step": 3853 - }, - { - "epoch": 2.916934720908231, - "grad_norm": 2.1404199600219727, - "learning_rate": 4.0263301244073465e-08, - "loss": 1.1179, - "step": 3854 - }, - { - "epoch": 2.9176915799432357, - "grad_norm": 1.968444585800171, - "learning_rate": 3.9564880785834875e-08, - "loss": 1.0376, - "step": 3855 - }, - { - "epoch": 2.91844843897824, - "grad_norm": 1.9066696166992188, - "learning_rate": 3.887288131676096e-08, - "loss": 1.0348, - "step": 3856 - }, - { - "epoch": 2.919205298013245, - "grad_norm": 2.04758620262146, - "learning_rate": 3.818730328290026e-08, - "loss": 1.0696, - "step": 3857 - }, - { - "epoch": 2.91996215704825, - "grad_norm": 1.8712373971939087, - "learning_rate": 3.750814712616839e-08, - "loss": 1.0559, - "step": 3858 - }, - { - "epoch": 2.9207190160832543, - "grad_norm": 1.9164494276046753, - "learning_rate": 3.6835413284338016e-08, - "loss": 1.0717, - "step": 3859 - }, - { - "epoch": 2.9214758751182592, - "grad_norm": 1.9720449447631836, - "learning_rate": 3.616910219104442e-08, - "loss": 1.1104, - "step": 3860 - }, - { - "epoch": 2.9222327341532637, - "grad_norm": 2.1905975341796875, - "learning_rate": 3.5509214275779944e-08, - "loss": 1.1058, - "step": 3861 - }, - { - "epoch": 2.9229895931882686, - "grad_norm": 1.912367582321167, - "learning_rate": 3.4855749963898434e-08, - "loss": 1.0694, - "step": 3862 - }, - { - "epoch": 2.9237464522232735, - "grad_norm": 2.054760456085205, - "learning_rate": 3.420870967661412e-08, - "loss": 1.0661, - "step": 3863 - }, - { - "epoch": 2.9245033112582783, - "grad_norm": 2.100724220275879, - "learning_rate": 3.3568093830998316e-08, - "loss": 1.0685, - "step": 3864 - }, - { - "epoch": 2.925260170293283, - "grad_norm": 1.9942377805709839, - "learning_rate": 3.2933902839982706e-08, - "loss": 1.0362, - "step": 3865 - }, - { - "epoch": 2.9260170293282877, - "grad_norm": 1.9845491647720337, - "learning_rate": 3.230613711235715e-08, - "loss": 1.0948, - "step": 3866 - }, - { - "epoch": 2.926773888363292, - "grad_norm": 1.9502067565917969, - "learning_rate": 3.168479705276969e-08, - "loss": 1.0292, - "step": 3867 - }, - { - "epoch": 2.927530747398297, - "grad_norm": 2.2125320434570312, - "learning_rate": 3.106988306172764e-08, - "loss": 1.0965, - "step": 3868 - }, - { - "epoch": 2.928287606433302, - "grad_norm": 2.0215206146240234, - "learning_rate": 3.046139553559317e-08, - "loss": 1.0451, - "step": 3869 - }, - { - "epoch": 2.9290444654683068, - "grad_norm": 2.161459445953369, - "learning_rate": 2.985933486658992e-08, - "loss": 1.0826, - "step": 3870 - }, - { - "epoch": 2.929801324503311, - "grad_norm": 2.112816572189331, - "learning_rate": 2.926370144279531e-08, - "loss": 1.1162, - "step": 3871 - }, - { - "epoch": 2.930558183538316, - "grad_norm": 2.043856382369995, - "learning_rate": 2.8674495648147115e-08, - "loss": 1.0377, - "step": 3872 - }, - { - "epoch": 2.9313150425733205, - "grad_norm": 2.230227470397949, - "learning_rate": 2.809171786243685e-08, - "loss": 1.0775, - "step": 3873 - }, - { - "epoch": 2.9320719016083254, - "grad_norm": 1.8375619649887085, - "learning_rate": 2.7515368461316434e-08, - "loss": 1.0217, - "step": 3874 - }, - { - "epoch": 2.9328287606433303, - "grad_norm": 1.9141755104064941, - "learning_rate": 2.694544781629039e-08, - "loss": 1.038, - "step": 3875 - }, - { - "epoch": 2.9335856196783348, - "grad_norm": 2.1602697372436523, - "learning_rate": 2.6381956294720323e-08, - "loss": 1.0714, - "step": 3876 - }, - { - "epoch": 2.9343424787133396, - "grad_norm": 1.923949956893921, - "learning_rate": 2.5824894259825987e-08, - "loss": 1.0561, - "step": 3877 - }, - { - "epoch": 2.9350993377483445, - "grad_norm": 2.1230146884918213, - "learning_rate": 2.5274262070678672e-08, - "loss": 1.0689, - "step": 3878 - }, - { - "epoch": 2.935856196783349, - "grad_norm": 2.0640814304351807, - "learning_rate": 2.4730060082210033e-08, - "loss": 1.0851, - "step": 3879 - }, - { - "epoch": 2.936613055818354, - "grad_norm": 1.8468024730682373, - "learning_rate": 2.4192288645203268e-08, - "loss": 1.0588, - "step": 3880 - }, - { - "epoch": 2.9373699148533587, - "grad_norm": 2.06715726852417, - "learning_rate": 2.3660948106297502e-08, - "loss": 1.04, - "step": 3881 - }, - { - "epoch": 2.938126773888363, - "grad_norm": 2.129422426223755, - "learning_rate": 2.313603880798671e-08, - "loss": 1.0935, - "step": 3882 - }, - { - "epoch": 2.938883632923368, - "grad_norm": 1.9994871616363525, - "learning_rate": 2.2617561088619707e-08, - "loss": 1.0823, - "step": 3883 - }, - { - "epoch": 2.9396404919583725, - "grad_norm": 1.9387072324752808, - "learning_rate": 2.2105515282399045e-08, - "loss": 1.0506, - "step": 3884 - }, - { - "epoch": 2.9403973509933774, - "grad_norm": 1.9773590564727783, - "learning_rate": 2.1599901719382117e-08, - "loss": 1.0713, - "step": 3885 - }, - { - "epoch": 2.9411542100283823, - "grad_norm": 1.9165699481964111, - "learning_rate": 2.110072072547893e-08, - "loss": 1.0889, - "step": 3886 - }, - { - "epoch": 2.941911069063387, - "grad_norm": 1.9767038822174072, - "learning_rate": 2.060797262245434e-08, - "loss": 1.1121, - "step": 3887 - }, - { - "epoch": 2.9426679280983916, - "grad_norm": 1.9442821741104126, - "learning_rate": 2.012165772792693e-08, - "loss": 1.0852, - "step": 3888 - }, - { - "epoch": 2.9434247871333965, - "grad_norm": 1.9988024234771729, - "learning_rate": 1.96417763553668e-08, - "loss": 1.0733, - "step": 3889 - }, - { - "epoch": 2.944181646168401, - "grad_norm": 2.0853540897369385, - "learning_rate": 1.91683288141e-08, - "loss": 1.0869, - "step": 3890 - }, - { - "epoch": 2.944938505203406, - "grad_norm": 1.8882020711898804, - "learning_rate": 1.8701315409300757e-08, - "loss": 1.0716, - "step": 3891 - }, - { - "epoch": 2.9456953642384107, - "grad_norm": 1.9533286094665527, - "learning_rate": 1.8240736442000363e-08, - "loss": 1.0977, - "step": 3892 - }, - { - "epoch": 2.946452223273415, - "grad_norm": 2.2869935035705566, - "learning_rate": 1.7786592209081624e-08, - "loss": 1.036, - "step": 3893 - }, - { - "epoch": 2.94720908230842, - "grad_norm": 1.8621643781661987, - "learning_rate": 1.733888300327774e-08, - "loss": 1.0655, - "step": 3894 - }, - { - "epoch": 2.947965941343425, - "grad_norm": 2.069187641143799, - "learning_rate": 1.689760911317565e-08, - "loss": 1.0701, - "step": 3895 - }, - { - "epoch": 2.9487228003784294, - "grad_norm": 2.112271547317505, - "learning_rate": 1.64627708232138e-08, - "loss": 1.0697, - "step": 3896 - }, - { - "epoch": 2.9494796594134343, - "grad_norm": 2.0788121223449707, - "learning_rate": 1.6034368413683266e-08, - "loss": 1.0746, - "step": 3897 - }, - { - "epoch": 2.950236518448439, - "grad_norm": 2.0578696727752686, - "learning_rate": 1.56124021607244e-08, - "loss": 1.0702, - "step": 3898 - }, - { - "epoch": 2.9509933774834436, - "grad_norm": 2.171917676925659, - "learning_rate": 1.519687233633019e-08, - "loss": 1.0755, - "step": 3899 - }, - { - "epoch": 2.9517502365184485, - "grad_norm": 1.9235490560531616, - "learning_rate": 1.4787779208345125e-08, - "loss": 1.0209, - "step": 3900 - }, - { - "epoch": 2.952507095553453, - "grad_norm": 2.046241283416748, - "learning_rate": 1.4385123040465213e-08, - "loss": 1.0691, - "step": 3901 - }, - { - "epoch": 2.953263954588458, - "grad_norm": 2.0865299701690674, - "learning_rate": 1.398890409223575e-08, - "loss": 1.0399, - "step": 3902 - }, - { - "epoch": 2.9540208136234627, - "grad_norm": 2.0731747150421143, - "learning_rate": 1.3599122619053542e-08, - "loss": 1.0499, - "step": 3903 - }, - { - "epoch": 2.9547776726584676, - "grad_norm": 2.198157548904419, - "learning_rate": 1.32157788721658e-08, - "loss": 1.0247, - "step": 3904 - }, - { - "epoch": 2.955534531693472, - "grad_norm": 2.0734500885009766, - "learning_rate": 1.2838873098669024e-08, - "loss": 1.0417, - "step": 3905 - }, - { - "epoch": 2.956291390728477, - "grad_norm": 1.8891007900238037, - "learning_rate": 1.2468405541513447e-08, - "loss": 1.0541, - "step": 3906 - }, - { - "epoch": 2.9570482497634814, - "grad_norm": 1.9352359771728516, - "learning_rate": 1.210437643949415e-08, - "loss": 1.0932, - "step": 3907 - }, - { - "epoch": 2.9578051087984862, - "grad_norm": 1.9413546323776245, - "learning_rate": 1.1746786027259944e-08, - "loss": 1.082, - "step": 3908 - }, - { - "epoch": 2.958561967833491, - "grad_norm": 2.091618299484253, - "learning_rate": 1.1395634535308943e-08, - "loss": 1.0216, - "step": 3909 - }, - { - "epoch": 2.959318826868496, - "grad_norm": 2.132253408432007, - "learning_rate": 1.1050922189986316e-08, - "loss": 1.0842, - "step": 3910 - }, - { - "epoch": 2.9600756859035005, - "grad_norm": 2.1053178310394287, - "learning_rate": 1.0712649213489865e-08, - "loss": 1.0316, - "step": 3911 - }, - { - "epoch": 2.9608325449385053, - "grad_norm": 2.1205570697784424, - "learning_rate": 1.0380815823864458e-08, - "loss": 1.065, - "step": 3912 - }, - { - "epoch": 2.96158940397351, - "grad_norm": 2.1178319454193115, - "learning_rate": 1.0055422235004254e-08, - "loss": 1.0666, - "step": 3913 - }, - { - "epoch": 2.9623462630085147, - "grad_norm": 2.021894693374634, - "learning_rate": 9.736468656653818e-09, - "loss": 1.0567, - "step": 3914 - }, - { - "epoch": 2.9631031220435196, - "grad_norm": 1.921276569366455, - "learning_rate": 9.423955294405891e-09, - "loss": 1.093, - "step": 3915 - }, - { - "epoch": 2.963859981078524, - "grad_norm": 2.062957286834717, - "learning_rate": 9.117882349702507e-09, - "loss": 1.0449, - "step": 3916 - }, - { - "epoch": 2.964616840113529, - "grad_norm": 2.258112668991089, - "learning_rate": 8.818250019831662e-09, - "loss": 1.0587, - "step": 3917 - }, - { - "epoch": 2.9653736991485333, - "grad_norm": 1.8993000984191895, - "learning_rate": 8.52505849793286e-09, - "loss": 1.0402, - "step": 3918 - }, - { - "epoch": 2.966130558183538, - "grad_norm": 1.8386201858520508, - "learning_rate": 8.23830797299268e-09, - "loss": 1.1062, - "step": 3919 - }, - { - "epoch": 2.966887417218543, - "grad_norm": 2.060410976409912, - "learning_rate": 7.957998629846991e-09, - "loss": 1.066, - "step": 3920 - }, - { - "epoch": 2.967644276253548, - "grad_norm": 2.098123073577881, - "learning_rate": 7.684130649177623e-09, - "loss": 1.0881, - "step": 3921 - }, - { - "epoch": 2.9684011352885524, - "grad_norm": 2.2169816493988037, - "learning_rate": 7.416704207515695e-09, - "loss": 1.0661, - "step": 3922 - }, - { - "epoch": 2.9691579943235573, - "grad_norm": 2.0316176414489746, - "learning_rate": 7.155719477241619e-09, - "loss": 1.0737, - "step": 3923 - }, - { - "epoch": 2.9699148533585618, - "grad_norm": 1.8836135864257812, - "learning_rate": 6.901176626581769e-09, - "loss": 1.0435, - "step": 3924 - }, - { - "epoch": 2.9706717123935666, - "grad_norm": 2.030869960784912, - "learning_rate": 6.653075819609588e-09, - "loss": 1.0358, - "step": 3925 - }, - { - "epoch": 2.9714285714285715, - "grad_norm": 2.1759679317474365, - "learning_rate": 6.411417216247812e-09, - "loss": 1.066, - "step": 3926 - }, - { - "epoch": 2.9721854304635764, - "grad_norm": 2.092773199081421, - "learning_rate": 6.176200972265136e-09, - "loss": 1.031, - "step": 3927 - }, - { - "epoch": 2.972942289498581, - "grad_norm": 2.3539814949035645, - "learning_rate": 5.947427239279547e-09, - "loss": 1.1136, - "step": 3928 - }, - { - "epoch": 2.9736991485335857, - "grad_norm": 2.3484017848968506, - "learning_rate": 5.725096164753884e-09, - "loss": 1.0145, - "step": 3929 - }, - { - "epoch": 2.97445600756859, - "grad_norm": 1.9310166835784912, - "learning_rate": 5.509207892001385e-09, - "loss": 1.0231, - "step": 3930 - }, - { - "epoch": 2.975212866603595, - "grad_norm": 1.9161075353622437, - "learning_rate": 5.299762560177922e-09, - "loss": 1.0041, - "step": 3931 - }, - { - "epoch": 2.9759697256386, - "grad_norm": 2.0112030506134033, - "learning_rate": 5.096760304289763e-09, - "loss": 1.1227, - "step": 3932 - }, - { - "epoch": 2.9767265846736044, - "grad_norm": 2.0244789123535156, - "learning_rate": 4.900201255189143e-09, - "loss": 1.0922, - "step": 3933 - }, - { - "epoch": 2.9774834437086093, - "grad_norm": 2.514658212661743, - "learning_rate": 4.710085539575363e-09, - "loss": 1.054, - "step": 3934 - }, - { - "epoch": 2.9782403027436137, - "grad_norm": 1.9655340909957886, - "learning_rate": 4.526413279993689e-09, - "loss": 1.0453, - "step": 3935 - }, - { - "epoch": 2.9789971617786186, - "grad_norm": 1.988950490951538, - "learning_rate": 4.349184594836453e-09, - "loss": 1.112, - "step": 3936 - }, - { - "epoch": 2.9797540208136235, - "grad_norm": 2.0003416538238525, - "learning_rate": 4.178399598341953e-09, - "loss": 1.0888, - "step": 3937 - }, - { - "epoch": 2.9805108798486284, - "grad_norm": 1.8892840147018433, - "learning_rate": 4.014058400597776e-09, - "loss": 1.0316, - "step": 3938 - }, - { - "epoch": 2.981267738883633, - "grad_norm": 1.8778574466705322, - "learning_rate": 3.856161107533029e-09, - "loss": 1.0264, - "step": 3939 - }, - { - "epoch": 2.9820245979186377, - "grad_norm": 1.9889436960220337, - "learning_rate": 3.70470782092722e-09, - "loss": 1.0775, - "step": 3940 - }, - { - "epoch": 2.982781456953642, - "grad_norm": 1.9453630447387695, - "learning_rate": 3.55969863840471e-09, - "loss": 1.0661, - "step": 3941 - }, - { - "epoch": 2.983538315988647, - "grad_norm": 2.1750118732452393, - "learning_rate": 3.421133653436929e-09, - "loss": 1.0582, - "step": 3942 - }, - { - "epoch": 2.984295175023652, - "grad_norm": 1.9517415761947632, - "learning_rate": 3.289012955339048e-09, - "loss": 1.061, - "step": 3943 - }, - { - "epoch": 2.985052034058657, - "grad_norm": 1.9353458881378174, - "learning_rate": 3.16333662927553e-09, - "loss": 1.0533, - "step": 3944 - }, - { - "epoch": 2.9858088930936613, - "grad_norm": 2.1572377681732178, - "learning_rate": 3.044104756254578e-09, - "loss": 1.0871, - "step": 3945 - }, - { - "epoch": 2.986565752128666, - "grad_norm": 1.9636902809143066, - "learning_rate": 2.9313174131325764e-09, - "loss": 1.0751, - "step": 3946 - }, - { - "epoch": 2.9873226111636706, - "grad_norm": 1.9330499172210693, - "learning_rate": 2.8249746726085392e-09, - "loss": 1.0858, - "step": 3947 - }, - { - "epoch": 2.9880794701986755, - "grad_norm": 1.8204699754714966, - "learning_rate": 2.7250766032307735e-09, - "loss": 1.034, - "step": 3948 - }, - { - "epoch": 2.9888363292336804, - "grad_norm": 1.9643014669418335, - "learning_rate": 2.6316232693913253e-09, - "loss": 1.062, - "step": 3949 - }, - { - "epoch": 2.989593188268685, - "grad_norm": 2.25469708442688, - "learning_rate": 2.544614731329312e-09, - "loss": 1.0581, - "step": 3950 - }, - { - "epoch": 2.9903500473036897, - "grad_norm": 2.0587730407714844, - "learning_rate": 2.464051045128703e-09, - "loss": 1.0528, - "step": 3951 - }, - { - "epoch": 2.9911069063386946, - "grad_norm": 1.9544748067855835, - "learning_rate": 2.389932262720538e-09, - "loss": 1.0655, - "step": 3952 - }, - { - "epoch": 2.991863765373699, - "grad_norm": 2.1084601879119873, - "learning_rate": 2.3222584318784854e-09, - "loss": 1.1346, - "step": 3953 - }, - { - "epoch": 2.992620624408704, - "grad_norm": 2.051255226135254, - "learning_rate": 2.261029596226618e-09, - "loss": 1.1014, - "step": 3954 - }, - { - "epoch": 2.993377483443709, - "grad_norm": 2.079298734664917, - "learning_rate": 2.206245795231637e-09, - "loss": 1.092, - "step": 3955 - }, - { - "epoch": 2.9941343424787132, - "grad_norm": 1.9083516597747803, - "learning_rate": 2.157907064203985e-09, - "loss": 1.0385, - "step": 3956 - }, - { - "epoch": 2.994891201513718, - "grad_norm": 2.315964460372925, - "learning_rate": 2.1160134343056167e-09, - "loss": 1.0794, - "step": 3957 - }, - { - "epoch": 2.9956480605487226, - "grad_norm": 2.072871685028076, - "learning_rate": 2.080564932537786e-09, - "loss": 1.0603, - "step": 3958 - }, - { - "epoch": 2.9964049195837275, - "grad_norm": 1.996877908706665, - "learning_rate": 2.0515615817510374e-09, - "loss": 1.066, - "step": 3959 - }, - { - "epoch": 2.9971617786187323, - "grad_norm": 2.1000583171844482, - "learning_rate": 2.0290034006407686e-09, - "loss": 1.0554, - "step": 3960 - }, - { - "epoch": 2.9979186376537372, - "grad_norm": 2.0836434364318848, - "learning_rate": 2.0128904037472256e-09, - "loss": 1.0769, - "step": 3961 - }, - { - "epoch": 2.9986754966887417, - "grad_norm": 1.999711275100708, - "learning_rate": 2.0032226014555062e-09, - "loss": 1.0731, - "step": 3962 - }, - { - "epoch": 2.9994323557237466, - "grad_norm": 2.6111867427825928, - "learning_rate": 2e-09, - "loss": 1.1002, - "step": 3963 - } - ], - "logging_steps": 1.0, - "max_steps": 3963, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 5.729010317829918e+19, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -}