diff --git "a/checkpoint-5500/trainer_state.json" "b/checkpoint-5500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-5500/trainer_state.json" @@ -0,0 +1,33016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.4, + "global_step": 5500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 1.9315, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 1.8391, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019996798975672214, + "loss": 1.8441, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019993597951344433, + "loss": 1.7656, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019990396927016646, + "loss": 1.6324, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019987195902688862, + "loss": 1.677, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019983994878361075, + "loss": 1.4647, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019980793854033291, + "loss": 1.4285, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019977592829705507, + "loss": 1.3052, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001997439180537772, + "loss": 1.2885, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019971190781049937, + "loss": 1.1584, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019967989756722153, + "loss": 1.106, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019964788732394366, + "loss": 1.2272, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019961587708066582, + "loss": 1.1007, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019958386683738798, + "loss": 0.9575, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019955185659411014, + "loss": 1.1255, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019951984635083227, + "loss": 0.9909, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019948783610755443, + "loss": 1.0567, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001994558258642766, + "loss": 1.1133, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019942381562099872, + "loss": 1.0576, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019939180537772088, + "loss": 1.0808, + "step": 21 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019935979513444302, + "loss": 0.9458, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001993277848911652, + "loss": 1.0027, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019929577464788734, + "loss": 0.9723, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019926376440460947, + "loss": 0.9045, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019923175416133163, + "loss": 0.8118, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001991997439180538, + "loss": 0.9211, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019916773367477595, + "loss": 0.9402, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019913572343149808, + "loss": 1.0336, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019910371318822024, + "loss": 0.9593, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001990717029449424, + "loss": 0.9217, + "step": 31 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019903969270166453, + "loss": 0.914, + "step": 32 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990076824583867, + "loss": 0.8461, + "step": 33 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019897567221510885, + "loss": 0.9383, + "step": 34 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019894366197183098, + "loss": 1.0103, + "step": 35 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019891165172855314, + "loss": 0.7654, + "step": 36 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001988796414852753, + "loss": 0.8492, + "step": 37 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019884763124199746, + "loss": 0.9894, + "step": 38 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001988156209987196, + "loss": 0.94, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019878361075544173, + "loss": 0.913, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019875160051216392, + "loss": 1.0549, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019871959026888605, + "loss": 0.8724, + "step": 42 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001986875800256082, + "loss": 0.8876, + "step": 43 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019865556978233034, + "loss": 1.013, + "step": 44 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019862355953905253, + "loss": 0.9322, + "step": 45 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019859154929577466, + "loss": 0.8533, + "step": 46 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001985595390524968, + "loss": 0.8774, + "step": 47 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019852752880921895, + "loss": 0.8907, + "step": 48 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001984955185659411, + "loss": 0.914, + "step": 49 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019846350832266327, + "loss": 1.0708, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001984314980793854, + "loss": 0.8649, + "step": 51 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019839948783610757, + "loss": 0.9685, + "step": 52 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019836747759282972, + "loss": 0.8426, + "step": 53 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019833546734955186, + "loss": 0.8327, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019830345710627402, + "loss": 0.8601, + "step": 55 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019827144686299618, + "loss": 0.98, + "step": 56 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001982394366197183, + "loss": 0.8878, + "step": 57 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019820742637644047, + "loss": 0.8317, + "step": 58 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019817541613316263, + "loss": 0.8962, + "step": 59 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001981434058898848, + "loss": 0.8735, + "step": 60 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019811139564660692, + "loss": 0.8498, + "step": 61 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019807938540332905, + "loss": 0.8897, + "step": 62 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019804737516005124, + "loss": 0.8903, + "step": 63 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019801536491677337, + "loss": 0.7908, + "step": 64 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019798335467349553, + "loss": 0.8899, + "step": 65 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019795134443021767, + "loss": 0.7803, + "step": 66 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019791933418693983, + "loss": 0.9622, + "step": 67 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019788732394366199, + "loss": 0.8514, + "step": 68 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019785531370038412, + "loss": 0.8658, + "step": 69 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019782330345710628, + "loss": 0.892, + "step": 70 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019779129321382844, + "loss": 0.7928, + "step": 71 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001977592829705506, + "loss": 0.9122, + "step": 72 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019772727272727273, + "loss": 0.8282, + "step": 73 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001976952624839949, + "loss": 0.9106, + "step": 74 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019766325224071705, + "loss": 0.9137, + "step": 75 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019763124199743918, + "loss": 0.8609, + "step": 76 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019759923175416134, + "loss": 0.8198, + "step": 77 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001975672215108835, + "loss": 0.805, + "step": 78 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019753521126760564, + "loss": 0.8454, + "step": 79 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001975032010243278, + "loss": 0.8345, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019747119078104995, + "loss": 0.9347, + "step": 81 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019743918053777211, + "loss": 0.9628, + "step": 82 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019740717029449425, + "loss": 0.8413, + "step": 83 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019737516005121638, + "loss": 0.8286, + "step": 84 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019734314980793857, + "loss": 0.8401, + "step": 85 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001973111395646607, + "loss": 0.8555, + "step": 86 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019727912932138286, + "loss": 0.8608, + "step": 87 + }, + { + "epoch": 0.07, + "learning_rate": 0.000197247119078105, + "loss": 0.8512, + "step": 88 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019721510883482715, + "loss": 0.8502, + "step": 89 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001971830985915493, + "loss": 0.883, + "step": 90 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019715108834827144, + "loss": 0.8998, + "step": 91 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001971190781049936, + "loss": 0.8364, + "step": 92 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019708706786171576, + "loss": 0.9417, + "step": 93 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001970550576184379, + "loss": 0.7837, + "step": 94 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019702304737516006, + "loss": 0.8604, + "step": 95 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019699103713188222, + "loss": 0.8501, + "step": 96 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019695902688860438, + "loss": 0.9183, + "step": 97 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001969270166453265, + "loss": 0.9338, + "step": 98 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019689500640204864, + "loss": 0.8868, + "step": 99 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019686299615877083, + "loss": 0.7937, + "step": 100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019683098591549296, + "loss": 0.9089, + "step": 101 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019679897567221512, + "loss": 0.9192, + "step": 102 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019676696542893725, + "loss": 0.6853, + "step": 103 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019673495518565944, + "loss": 0.8323, + "step": 104 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019670294494238157, + "loss": 1.0199, + "step": 105 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001966709346991037, + "loss": 0.8575, + "step": 106 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001966389244558259, + "loss": 0.9167, + "step": 107 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019660691421254803, + "loss": 0.8873, + "step": 108 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019657490396927018, + "loss": 0.8711, + "step": 109 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019654289372599232, + "loss": 0.8671, + "step": 110 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019651088348271448, + "loss": 0.7837, + "step": 111 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019647887323943664, + "loss": 0.9155, + "step": 112 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019644686299615877, + "loss": 0.885, + "step": 113 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019641485275288093, + "loss": 0.768, + "step": 114 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001963828425096031, + "loss": 0.9048, + "step": 115 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019635083226632522, + "loss": 0.8682, + "step": 116 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019631882202304738, + "loss": 0.8946, + "step": 117 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019628681177976954, + "loss": 0.9001, + "step": 118 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001962548015364917, + "loss": 0.8769, + "step": 119 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019622279129321383, + "loss": 0.8878, + "step": 120 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019619078104993597, + "loss": 0.8588, + "step": 121 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019615877080665815, + "loss": 0.8742, + "step": 122 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001961267605633803, + "loss": 0.8681, + "step": 123 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019609475032010245, + "loss": 0.7442, + "step": 124 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019606274007682458, + "loss": 0.8789, + "step": 125 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019603072983354674, + "loss": 0.907, + "step": 126 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001959987195902689, + "loss": 0.8865, + "step": 127 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019596670934699103, + "loss": 0.7627, + "step": 128 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001959346991037132, + "loss": 0.9144, + "step": 129 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019590268886043535, + "loss": 0.7996, + "step": 130 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001958706786171575, + "loss": 0.8552, + "step": 131 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019583866837387964, + "loss": 0.8462, + "step": 132 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001958066581306018, + "loss": 0.9224, + "step": 133 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019577464788732396, + "loss": 0.9863, + "step": 134 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001957426376440461, + "loss": 0.8801, + "step": 135 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019571062740076826, + "loss": 0.8503, + "step": 136 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019567861715749042, + "loss": 0.7993, + "step": 137 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019564660691421255, + "loss": 1.0245, + "step": 138 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001956145966709347, + "loss": 0.8884, + "step": 139 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019558258642765687, + "loss": 0.8398, + "step": 140 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019555057618437903, + "loss": 0.947, + "step": 141 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019551856594110116, + "loss": 0.9127, + "step": 142 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001954865556978233, + "loss": 0.8593, + "step": 143 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019545454545454548, + "loss": 0.922, + "step": 144 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001954225352112676, + "loss": 0.8535, + "step": 145 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019539052496798977, + "loss": 0.9153, + "step": 146 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953585147247119, + "loss": 0.898, + "step": 147 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019532650448143406, + "loss": 0.7648, + "step": 148 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019529449423815622, + "loss": 0.8843, + "step": 149 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019526248399487836, + "loss": 0.8734, + "step": 150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019523047375160052, + "loss": 0.8772, + "step": 151 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019519846350832268, + "loss": 0.7813, + "step": 152 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001951664532650448, + "loss": 0.8675, + "step": 153 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019513444302176697, + "loss": 0.8901, + "step": 154 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019510243277848913, + "loss": 0.7662, + "step": 155 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001950704225352113, + "loss": 0.8703, + "step": 156 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019503841229193342, + "loss": 0.8963, + "step": 157 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019500640204865558, + "loss": 0.8979, + "step": 158 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019497439180537774, + "loss": 1.0489, + "step": 159 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019494238156209987, + "loss": 0.8099, + "step": 160 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019491037131882203, + "loss": 0.9223, + "step": 161 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001948783610755442, + "loss": 0.8511, + "step": 162 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019484635083226635, + "loss": 0.9503, + "step": 163 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019481434058898849, + "loss": 0.8375, + "step": 164 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019478233034571062, + "loss": 0.8652, + "step": 165 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001947503201024328, + "loss": 0.7749, + "step": 166 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019471830985915494, + "loss": 0.7934, + "step": 167 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001946862996158771, + "loss": 0.8054, + "step": 168 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019465428937259923, + "loss": 0.8301, + "step": 169 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001946222791293214, + "loss": 0.8996, + "step": 170 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019459026888604355, + "loss": 0.9442, + "step": 171 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019455825864276568, + "loss": 0.8854, + "step": 172 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019452624839948784, + "loss": 0.7999, + "step": 173 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019449423815621, + "loss": 0.8808, + "step": 174 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019446222791293213, + "loss": 0.7705, + "step": 175 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001944302176696543, + "loss": 0.8195, + "step": 176 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019439820742637645, + "loss": 0.8216, + "step": 177 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019436619718309861, + "loss": 0.9044, + "step": 178 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019433418693982075, + "loss": 0.9466, + "step": 179 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019430217669654288, + "loss": 0.8257, + "step": 180 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019427016645326507, + "loss": 0.8719, + "step": 181 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001942381562099872, + "loss": 0.8018, + "step": 182 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019420614596670936, + "loss": 0.9924, + "step": 183 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019417413572343152, + "loss": 0.8662, + "step": 184 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019414212548015368, + "loss": 0.8489, + "step": 185 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001941101152368758, + "loss": 0.8398, + "step": 186 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019407810499359794, + "loss": 0.8627, + "step": 187 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019404609475032013, + "loss": 0.8292, + "step": 188 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019401408450704226, + "loss": 0.8722, + "step": 189 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019398207426376442, + "loss": 0.9201, + "step": 190 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019395006402048656, + "loss": 0.9509, + "step": 191 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019391805377720872, + "loss": 0.8291, + "step": 192 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019388604353393088, + "loss": 0.757, + "step": 193 + }, + { + "epoch": 0.16, + "learning_rate": 0.000193854033290653, + "loss": 0.8797, + "step": 194 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019382202304737517, + "loss": 0.8004, + "step": 195 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019379001280409733, + "loss": 0.7925, + "step": 196 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019375800256081946, + "loss": 0.8024, + "step": 197 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019372599231754162, + "loss": 0.8551, + "step": 198 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019369398207426378, + "loss": 0.741, + "step": 199 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019366197183098594, + "loss": 0.7821, + "step": 200 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019362996158770807, + "loss": 0.7872, + "step": 201 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935979513444302, + "loss": 0.7361, + "step": 202 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935659411011524, + "loss": 0.9066, + "step": 203 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019353393085787452, + "loss": 0.9637, + "step": 204 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019350192061459668, + "loss": 0.7598, + "step": 205 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019346991037131882, + "loss": 0.9266, + "step": 206 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019343790012804098, + "loss": 0.9029, + "step": 207 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019340588988476314, + "loss": 0.8283, + "step": 208 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019337387964148527, + "loss": 1.0154, + "step": 209 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019334186939820746, + "loss": 0.8852, + "step": 210 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001933098591549296, + "loss": 0.8763, + "step": 211 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019327784891165175, + "loss": 0.8474, + "step": 212 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019324583866837388, + "loss": 0.8772, + "step": 213 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019321382842509604, + "loss": 0.7933, + "step": 214 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001931818181818182, + "loss": 0.8402, + "step": 215 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019314980793854033, + "loss": 0.8051, + "step": 216 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001931177976952625, + "loss": 0.806, + "step": 217 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019308578745198465, + "loss": 0.7911, + "step": 218 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019305377720870679, + "loss": 0.8357, + "step": 219 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019302176696542895, + "loss": 0.8854, + "step": 220 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001929897567221511, + "loss": 0.9124, + "step": 221 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019295774647887326, + "loss": 0.7755, + "step": 222 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001929257362355954, + "loss": 0.8193, + "step": 223 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019289372599231753, + "loss": 0.8625, + "step": 224 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019286171574903972, + "loss": 0.8731, + "step": 225 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019282970550576185, + "loss": 0.8643, + "step": 226 + }, + { + "epoch": 0.18, + "learning_rate": 0.000192797695262484, + "loss": 0.7401, + "step": 227 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019276568501920614, + "loss": 0.7438, + "step": 228 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001927336747759283, + "loss": 0.7978, + "step": 229 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019270166453265046, + "loss": 0.8409, + "step": 230 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001926696542893726, + "loss": 0.99, + "step": 231 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019263764404609475, + "loss": 0.8783, + "step": 232 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019260563380281691, + "loss": 0.9061, + "step": 233 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019257362355953905, + "loss": 0.7678, + "step": 234 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001925416133162612, + "loss": 0.8586, + "step": 235 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019250960307298337, + "loss": 0.864, + "step": 236 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019247759282970553, + "loss": 0.7929, + "step": 237 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019244558258642766, + "loss": 0.8414, + "step": 238 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019241357234314982, + "loss": 0.7847, + "step": 239 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019238156209987198, + "loss": 0.8775, + "step": 240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001923495518565941, + "loss": 0.9297, + "step": 241 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019231754161331627, + "loss": 0.7949, + "step": 242 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019228553137003843, + "loss": 0.7288, + "step": 243 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001922535211267606, + "loss": 0.701, + "step": 244 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019222151088348272, + "loss": 0.8869, + "step": 245 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019218950064020486, + "loss": 0.7423, + "step": 246 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019215749039692704, + "loss": 0.8798, + "step": 247 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019212548015364918, + "loss": 0.8075, + "step": 248 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019209346991037134, + "loss": 0.8108, + "step": 249 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019206145966709347, + "loss": 0.7848, + "step": 250 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019202944942381563, + "loss": 0.9251, + "step": 251 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001919974391805378, + "loss": 0.8289, + "step": 252 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019196542893725992, + "loss": 0.7946, + "step": 253 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019193341869398208, + "loss": 0.8757, + "step": 254 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019190140845070424, + "loss": 0.9176, + "step": 255 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019186939820742637, + "loss": 0.7873, + "step": 256 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019183738796414853, + "loss": 0.8681, + "step": 257 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001918053777208707, + "loss": 0.7723, + "step": 258 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019177336747759285, + "loss": 0.9087, + "step": 259 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019174135723431498, + "loss": 0.8274, + "step": 260 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019170934699103714, + "loss": 0.7061, + "step": 261 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001916773367477593, + "loss": 0.8029, + "step": 262 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019164532650448144, + "loss": 0.9644, + "step": 263 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001916133162612036, + "loss": 0.8109, + "step": 264 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019158130601792576, + "loss": 0.7946, + "step": 265 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001915492957746479, + "loss": 0.9144, + "step": 266 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019151728553137005, + "loss": 0.8488, + "step": 267 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019148527528809218, + "loss": 0.9137, + "step": 268 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019145326504481437, + "loss": 0.8399, + "step": 269 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001914212548015365, + "loss": 0.936, + "step": 270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019138924455825866, + "loss": 0.7787, + "step": 271 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001913572343149808, + "loss": 0.7859, + "step": 272 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019132522407170295, + "loss": 0.7855, + "step": 273 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001912932138284251, + "loss": 0.7884, + "step": 274 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019126120358514725, + "loss": 0.8351, + "step": 275 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001912291933418694, + "loss": 0.81, + "step": 276 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019119718309859157, + "loss": 0.8702, + "step": 277 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001911651728553137, + "loss": 0.8879, + "step": 278 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019113316261203586, + "loss": 0.8077, + "step": 279 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019110115236875802, + "loss": 0.7394, + "step": 280 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019106914212548018, + "loss": 0.8772, + "step": 281 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001910371318822023, + "loss": 0.9009, + "step": 282 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019100512163892444, + "loss": 0.8463, + "step": 283 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019097311139564663, + "loss": 0.7462, + "step": 284 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019094110115236876, + "loss": 0.7717, + "step": 285 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019090909090909092, + "loss": 0.9058, + "step": 286 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019087708066581308, + "loss": 0.899, + "step": 287 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019084507042253521, + "loss": 0.8285, + "step": 288 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019081306017925737, + "loss": 0.8832, + "step": 289 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001907810499359795, + "loss": 0.835, + "step": 290 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001907490396927017, + "loss": 0.8642, + "step": 291 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019071702944942383, + "loss": 0.8272, + "step": 292 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019068501920614596, + "loss": 0.7325, + "step": 293 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019065300896286812, + "loss": 0.7843, + "step": 294 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019062099871959028, + "loss": 0.8673, + "step": 295 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019058898847631244, + "loss": 0.7734, + "step": 296 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019055697823303457, + "loss": 0.9477, + "step": 297 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019052496798975673, + "loss": 0.9324, + "step": 298 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001904929577464789, + "loss": 0.8609, + "step": 299 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019046094750320102, + "loss": 0.8278, + "step": 300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019042893725992318, + "loss": 1.0022, + "step": 301 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019039692701664534, + "loss": 0.8121, + "step": 302 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001903649167733675, + "loss": 0.8454, + "step": 303 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019033290653008964, + "loss": 0.8791, + "step": 304 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019030089628681177, + "loss": 0.8723, + "step": 305 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019026888604353396, + "loss": 0.8677, + "step": 306 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001902368758002561, + "loss": 0.8207, + "step": 307 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019020486555697825, + "loss": 0.8039, + "step": 308 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019017285531370038, + "loss": 0.8103, + "step": 309 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019014084507042254, + "loss": 0.9038, + "step": 310 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001901088348271447, + "loss": 0.7618, + "step": 311 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019007682458386683, + "loss": 0.8253, + "step": 312 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019004481434058902, + "loss": 0.7338, + "step": 313 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019001280409731115, + "loss": 0.8628, + "step": 314 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018998079385403328, + "loss": 0.8617, + "step": 315 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018994878361075544, + "loss": 0.888, + "step": 316 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001899167733674776, + "loss": 0.823, + "step": 317 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018988476312419976, + "loss": 0.9481, + "step": 318 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001898527528809219, + "loss": 0.9014, + "step": 319 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018982074263764406, + "loss": 0.9272, + "step": 320 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018978873239436622, + "loss": 0.9695, + "step": 321 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018975672215108835, + "loss": 0.8319, + "step": 322 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001897247119078105, + "loss": 0.9359, + "step": 323 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018969270166453267, + "loss": 0.7734, + "step": 324 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018966069142125483, + "loss": 0.938, + "step": 325 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018962868117797696, + "loss": 0.8277, + "step": 326 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001895966709346991, + "loss": 0.8462, + "step": 327 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018956466069142128, + "loss": 0.7983, + "step": 328 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001895326504481434, + "loss": 0.9047, + "step": 329 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018950064020486557, + "loss": 0.8525, + "step": 330 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001894686299615877, + "loss": 0.8341, + "step": 331 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018943661971830987, + "loss": 0.8353, + "step": 332 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018940460947503203, + "loss": 0.7323, + "step": 333 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018937259923175416, + "loss": 0.8815, + "step": 334 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018934058898847632, + "loss": 0.8846, + "step": 335 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018930857874519848, + "loss": 0.7966, + "step": 336 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001892765685019206, + "loss": 0.8179, + "step": 337 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018924455825864277, + "loss": 0.772, + "step": 338 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018921254801536493, + "loss": 0.8358, + "step": 339 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001891805377720871, + "loss": 0.8171, + "step": 340 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018914852752880922, + "loss": 0.9321, + "step": 341 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018911651728553138, + "loss": 0.8696, + "step": 342 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018908450704225354, + "loss": 0.8824, + "step": 343 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018905249679897567, + "loss": 0.8447, + "step": 344 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018902048655569783, + "loss": 0.9029, + "step": 345 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018898847631242, + "loss": 0.8869, + "step": 346 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018895646606914213, + "loss": 0.8039, + "step": 347 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001889244558258643, + "loss": 0.9036, + "step": 348 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018889244558258642, + "loss": 0.8788, + "step": 349 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001888604353393086, + "loss": 0.9308, + "step": 350 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018882842509603074, + "loss": 0.9206, + "step": 351 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001887964148527529, + "loss": 0.8557, + "step": 352 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018876440460947503, + "loss": 0.7907, + "step": 353 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001887323943661972, + "loss": 0.8699, + "step": 354 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018870038412291935, + "loss": 0.8015, + "step": 355 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018866837387964148, + "loss": 0.86, + "step": 356 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018863636363636364, + "loss": 0.7817, + "step": 357 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001886043533930858, + "loss": 0.7862, + "step": 358 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018857234314980794, + "loss": 0.8325, + "step": 359 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001885403329065301, + "loss": 0.8379, + "step": 360 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018850832266325226, + "loss": 0.8594, + "step": 361 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018847631241997442, + "loss": 0.6996, + "step": 362 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018844430217669655, + "loss": 0.7794, + "step": 363 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001884122919334187, + "loss": 0.6593, + "step": 364 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018838028169014087, + "loss": 0.7969, + "step": 365 + }, + { + "epoch": 0.29, + "learning_rate": 0.000188348271446863, + "loss": 0.8293, + "step": 366 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018831626120358516, + "loss": 0.9299, + "step": 367 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018828425096030732, + "loss": 1.0683, + "step": 368 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018825224071702945, + "loss": 0.7665, + "step": 369 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001882202304737516, + "loss": 0.8487, + "step": 370 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018818822023047374, + "loss": 0.8032, + "step": 371 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018815620998719593, + "loss": 0.8072, + "step": 372 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018812419974391806, + "loss": 0.8926, + "step": 373 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001880921895006402, + "loss": 0.7657, + "step": 374 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018806017925736236, + "loss": 0.7209, + "step": 375 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018802816901408452, + "loss": 0.8408, + "step": 376 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018799615877080668, + "loss": 0.8641, + "step": 377 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001879641485275288, + "loss": 0.7724, + "step": 378 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018793213828425097, + "loss": 0.9203, + "step": 379 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018790012804097313, + "loss": 0.6768, + "step": 380 + }, + { + "epoch": 0.3, + "learning_rate": 0.00018786811779769526, + "loss": 0.9299, + "step": 381 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018783610755441742, + "loss": 0.7752, + "step": 382 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018780409731113958, + "loss": 0.8127, + "step": 383 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018777208706786174, + "loss": 0.8628, + "step": 384 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018774007682458387, + "loss": 0.9397, + "step": 385 + }, + { + "epoch": 0.31, + "learning_rate": 0.000187708066581306, + "loss": 0.7555, + "step": 386 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001876760563380282, + "loss": 0.8191, + "step": 387 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018764404609475033, + "loss": 0.8863, + "step": 388 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018761203585147249, + "loss": 0.7836, + "step": 389 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018758002560819465, + "loss": 0.9117, + "step": 390 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018754801536491678, + "loss": 0.8898, + "step": 391 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018751600512163894, + "loss": 0.8702, + "step": 392 + }, + { + "epoch": 0.31, + "learning_rate": 0.00018748399487836107, + "loss": 0.7765, + "step": 393 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018745198463508326, + "loss": 0.8169, + "step": 394 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001874199743918054, + "loss": 0.8381, + "step": 395 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018738796414852752, + "loss": 0.7824, + "step": 396 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018735595390524968, + "loss": 0.9157, + "step": 397 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018732394366197184, + "loss": 0.9007, + "step": 398 + }, + { + "epoch": 0.32, + "learning_rate": 0.000187291933418694, + "loss": 0.8198, + "step": 399 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018725992317541613, + "loss": 0.827, + "step": 400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001872279129321383, + "loss": 0.7988, + "step": 401 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018719590268886045, + "loss": 0.8396, + "step": 402 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001871638924455826, + "loss": 0.8873, + "step": 403 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018713188220230475, + "loss": 0.878, + "step": 404 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001870998719590269, + "loss": 0.8117, + "step": 405 + }, + { + "epoch": 0.32, + "learning_rate": 0.00018706786171574904, + "loss": 0.8518, + "step": 406 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001870358514724712, + "loss": 0.7746, + "step": 407 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018700384122919333, + "loss": 0.7945, + "step": 408 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018697183098591552, + "loss": 0.935, + "step": 409 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018693982074263765, + "loss": 0.8708, + "step": 410 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001869078104993598, + "loss": 0.8271, + "step": 411 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018687580025608194, + "loss": 0.899, + "step": 412 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001868437900128041, + "loss": 0.8558, + "step": 413 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018681177976952626, + "loss": 0.8582, + "step": 414 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001867797695262484, + "loss": 0.8338, + "step": 415 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018674775928297056, + "loss": 0.8864, + "step": 416 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018671574903969272, + "loss": 0.7602, + "step": 417 + }, + { + "epoch": 0.33, + "learning_rate": 0.00018668373879641485, + "loss": 0.8053, + "step": 418 + }, + { + "epoch": 0.34, + "learning_rate": 0.000186651728553137, + "loss": 0.8215, + "step": 419 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018661971830985917, + "loss": 0.9307, + "step": 420 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018658770806658133, + "loss": 0.7872, + "step": 421 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018655569782330346, + "loss": 0.9308, + "step": 422 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018652368758002562, + "loss": 0.9111, + "step": 423 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018649167733674778, + "loss": 0.8009, + "step": 424 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001864596670934699, + "loss": 0.7599, + "step": 425 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018642765685019207, + "loss": 0.8486, + "step": 426 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018639564660691423, + "loss": 0.7983, + "step": 427 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018636363636363636, + "loss": 0.8217, + "step": 428 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018633162612035852, + "loss": 0.826, + "step": 429 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018629961587708066, + "loss": 0.8371, + "step": 430 + }, + { + "epoch": 0.34, + "learning_rate": 0.00018626760563380284, + "loss": 0.9477, + "step": 431 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018623559539052498, + "loss": 0.9015, + "step": 432 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001862035851472471, + "loss": 0.9626, + "step": 433 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018617157490396927, + "loss": 0.9214, + "step": 434 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018613956466069143, + "loss": 0.8551, + "step": 435 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001861075544174136, + "loss": 0.8984, + "step": 436 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018607554417413572, + "loss": 0.9231, + "step": 437 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018604353393085788, + "loss": 0.889, + "step": 438 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018601152368758004, + "loss": 0.8963, + "step": 439 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018597951344430217, + "loss": 0.9209, + "step": 440 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018594750320102433, + "loss": 0.7419, + "step": 441 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001859154929577465, + "loss": 0.8324, + "step": 442 + }, + { + "epoch": 0.35, + "learning_rate": 0.00018588348271446865, + "loss": 0.747, + "step": 443 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018585147247119079, + "loss": 0.7793, + "step": 444 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018581946222791295, + "loss": 0.7732, + "step": 445 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001857874519846351, + "loss": 0.7829, + "step": 446 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018575544174135724, + "loss": 0.7964, + "step": 447 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001857234314980794, + "loss": 0.8326, + "step": 448 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018569142125480156, + "loss": 0.7514, + "step": 449 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001856594110115237, + "loss": 0.8751, + "step": 450 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018562740076824585, + "loss": 0.9423, + "step": 451 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018559539052496798, + "loss": 0.8569, + "step": 452 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018556338028169017, + "loss": 0.8859, + "step": 453 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001855313700384123, + "loss": 0.909, + "step": 454 + }, + { + "epoch": 0.36, + "learning_rate": 0.00018549935979513443, + "loss": 0.8874, + "step": 455 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001854673495518566, + "loss": 0.8224, + "step": 456 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018543533930857875, + "loss": 0.9106, + "step": 457 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018540332906530091, + "loss": 0.8802, + "step": 458 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018537131882202305, + "loss": 0.7875, + "step": 459 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001853393085787452, + "loss": 0.9491, + "step": 460 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018530729833546737, + "loss": 0.8323, + "step": 461 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001852752880921895, + "loss": 0.6475, + "step": 462 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018524327784891166, + "loss": 0.8548, + "step": 463 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018521126760563382, + "loss": 0.8599, + "step": 464 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018517925736235598, + "loss": 0.6655, + "step": 465 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001851472471190781, + "loss": 0.8753, + "step": 466 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018511523687580027, + "loss": 0.9359, + "step": 467 + }, + { + "epoch": 0.37, + "learning_rate": 0.00018508322663252243, + "loss": 0.8909, + "step": 468 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018505121638924456, + "loss": 0.7802, + "step": 469 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018501920614596672, + "loss": 0.937, + "step": 470 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018498719590268888, + "loss": 0.8469, + "step": 471 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018495518565941102, + "loss": 0.7917, + "step": 472 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018492317541613318, + "loss": 0.7483, + "step": 473 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001848911651728553, + "loss": 0.7961, + "step": 474 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001848591549295775, + "loss": 0.944, + "step": 475 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018482714468629963, + "loss": 0.9443, + "step": 476 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018479513444302176, + "loss": 0.9223, + "step": 477 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018476312419974392, + "loss": 0.8869, + "step": 478 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018473111395646608, + "loss": 0.696, + "step": 479 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018469910371318824, + "loss": 0.8594, + "step": 480 + }, + { + "epoch": 0.38, + "learning_rate": 0.00018466709346991037, + "loss": 0.7863, + "step": 481 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018463508322663253, + "loss": 0.8353, + "step": 482 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001846030729833547, + "loss": 0.7911, + "step": 483 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018457106274007682, + "loss": 0.8551, + "step": 484 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018453905249679898, + "loss": 0.9459, + "step": 485 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018450704225352114, + "loss": 0.8402, + "step": 486 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018447503201024328, + "loss": 0.9455, + "step": 487 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018444302176696544, + "loss": 0.9049, + "step": 488 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018441101152368757, + "loss": 0.868, + "step": 489 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018437900128040976, + "loss": 0.8347, + "step": 490 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001843469910371319, + "loss": 0.7836, + "step": 491 + }, + { + "epoch": 0.39, + "learning_rate": 0.00018431498079385405, + "loss": 0.741, + "step": 492 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001842829705505762, + "loss": 0.7387, + "step": 493 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018425096030729834, + "loss": 0.7173, + "step": 494 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001842189500640205, + "loss": 0.7886, + "step": 495 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018418693982074263, + "loss": 0.9398, + "step": 496 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018415492957746482, + "loss": 0.8796, + "step": 497 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018412291933418695, + "loss": 0.987, + "step": 498 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018409090909090909, + "loss": 0.8179, + "step": 499 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018405889884763125, + "loss": 0.9324, + "step": 500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001840268886043534, + "loss": 0.7424, + "step": 501 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018399487836107557, + "loss": 0.7985, + "step": 502 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001839628681177977, + "loss": 0.7645, + "step": 503 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018393085787451986, + "loss": 0.8283, + "step": 504 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018389884763124202, + "loss": 0.9172, + "step": 505 + }, + { + "epoch": 0.4, + "learning_rate": 0.00018386683738796415, + "loss": 0.7786, + "step": 506 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001838348271446863, + "loss": 0.8414, + "step": 507 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018380281690140847, + "loss": 0.8636, + "step": 508 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001837708066581306, + "loss": 0.829, + "step": 509 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018373879641485276, + "loss": 0.7562, + "step": 510 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001837067861715749, + "loss": 0.9098, + "step": 511 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018367477592829708, + "loss": 0.8473, + "step": 512 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018364276568501921, + "loss": 0.7459, + "step": 513 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018361075544174135, + "loss": 0.8395, + "step": 514 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001835787451984635, + "loss": 0.7941, + "step": 515 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018354673495518567, + "loss": 0.8841, + "step": 516 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018351472471190783, + "loss": 0.713, + "step": 517 + }, + { + "epoch": 0.41, + "learning_rate": 0.00018348271446862996, + "loss": 0.7585, + "step": 518 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018345070422535212, + "loss": 0.8871, + "step": 519 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018341869398207428, + "loss": 0.7748, + "step": 520 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001833866837387964, + "loss": 0.874, + "step": 521 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018335467349551857, + "loss": 0.8121, + "step": 522 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018332266325224073, + "loss": 0.9474, + "step": 523 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001832906530089629, + "loss": 0.8082, + "step": 524 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018325864276568502, + "loss": 0.7965, + "step": 525 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018322663252240718, + "loss": 0.7245, + "step": 526 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018319462227912934, + "loss": 0.8257, + "step": 527 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018316261203585148, + "loss": 0.7629, + "step": 528 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018313060179257364, + "loss": 0.8382, + "step": 529 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001830985915492958, + "loss": 0.8895, + "step": 530 + }, + { + "epoch": 0.42, + "learning_rate": 0.00018306658130601793, + "loss": 0.8657, + "step": 531 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001830345710627401, + "loss": 0.803, + "step": 532 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018300256081946222, + "loss": 0.8395, + "step": 533 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001829705505761844, + "loss": 0.8885, + "step": 534 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018293854033290654, + "loss": 0.9239, + "step": 535 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018290653008962867, + "loss": 0.8181, + "step": 536 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018287451984635083, + "loss": 0.8911, + "step": 537 + }, + { + "epoch": 0.43, + "learning_rate": 0.000182842509603073, + "loss": 0.9071, + "step": 538 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018281049935979515, + "loss": 0.7655, + "step": 539 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018277848911651728, + "loss": 0.8869, + "step": 540 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018274647887323944, + "loss": 0.7825, + "step": 541 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001827144686299616, + "loss": 0.8369, + "step": 542 + }, + { + "epoch": 0.43, + "learning_rate": 0.00018268245838668374, + "loss": 0.8596, + "step": 543 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001826504481434059, + "loss": 0.7925, + "step": 544 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018261843790012806, + "loss": 0.9609, + "step": 545 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018258642765685022, + "loss": 0.8913, + "step": 546 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018255441741357235, + "loss": 0.7056, + "step": 547 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001825224071702945, + "loss": 0.9277, + "step": 548 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018249039692701667, + "loss": 0.8671, + "step": 549 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001824583866837388, + "loss": 0.7622, + "step": 550 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018242637644046096, + "loss": 0.8714, + "step": 551 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018239436619718312, + "loss": 0.7377, + "step": 552 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018236235595390525, + "loss": 0.7515, + "step": 553 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001823303457106274, + "loss": 0.8823, + "step": 554 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018229833546734955, + "loss": 0.8167, + "step": 555 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018226632522407173, + "loss": 0.9988, + "step": 556 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018223431498079387, + "loss": 0.94, + "step": 557 + }, + { + "epoch": 0.45, + "learning_rate": 0.000182202304737516, + "loss": 0.8409, + "step": 558 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018217029449423816, + "loss": 0.7887, + "step": 559 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018213828425096032, + "loss": 0.8213, + "step": 560 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018210627400768248, + "loss": 0.8411, + "step": 561 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001820742637644046, + "loss": 0.8791, + "step": 562 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018204225352112677, + "loss": 0.8165, + "step": 563 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018201024327784893, + "loss": 0.9362, + "step": 564 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018197823303457106, + "loss": 0.8369, + "step": 565 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018194622279129322, + "loss": 0.8853, + "step": 566 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018191421254801538, + "loss": 0.8202, + "step": 567 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018188220230473751, + "loss": 0.7873, + "step": 568 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018185019206145967, + "loss": 0.8558, + "step": 569 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018181818181818183, + "loss": 0.8611, + "step": 570 + }, + { + "epoch": 0.46, + "learning_rate": 0.000181786171574904, + "loss": 0.9649, + "step": 571 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018175416133162613, + "loss": 0.8515, + "step": 572 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018172215108834826, + "loss": 0.9032, + "step": 573 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018169014084507045, + "loss": 0.8443, + "step": 574 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018165813060179258, + "loss": 0.784, + "step": 575 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018162612035851474, + "loss": 0.8274, + "step": 576 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018159411011523687, + "loss": 0.7932, + "step": 577 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018156209987195906, + "loss": 0.7656, + "step": 578 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001815300896286812, + "loss": 0.8199, + "step": 579 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018149807938540332, + "loss": 0.8427, + "step": 580 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018146606914212548, + "loss": 0.8398, + "step": 581 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018143405889884764, + "loss": 0.7899, + "step": 582 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001814020486555698, + "loss": 0.8586, + "step": 583 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018137003841229194, + "loss": 0.7978, + "step": 584 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001813380281690141, + "loss": 0.8531, + "step": 585 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018130601792573626, + "loss": 0.8284, + "step": 586 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001812740076824584, + "loss": 0.9183, + "step": 587 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018124199743918055, + "loss": 0.6815, + "step": 588 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001812099871959027, + "loss": 0.7949, + "step": 589 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018117797695262484, + "loss": 0.8758, + "step": 590 + }, + { + "epoch": 0.47, + "learning_rate": 0.000181145966709347, + "loss": 0.8122, + "step": 591 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018111395646606913, + "loss": 0.9532, + "step": 592 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018108194622279132, + "loss": 0.7524, + "step": 593 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018104993597951345, + "loss": 0.827, + "step": 594 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018101792573623558, + "loss": 0.7709, + "step": 595 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018098591549295774, + "loss": 0.8297, + "step": 596 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001809539052496799, + "loss": 0.8368, + "step": 597 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018092189500640206, + "loss": 0.8493, + "step": 598 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001808898847631242, + "loss": 0.7531, + "step": 599 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018085787451984636, + "loss": 0.9589, + "step": 600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018082586427656852, + "loss": 0.8053, + "step": 601 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018079385403329065, + "loss": 0.9307, + "step": 602 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001807618437900128, + "loss": 0.7764, + "step": 603 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018072983354673497, + "loss": 0.7977, + "step": 604 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018069782330345713, + "loss": 0.745, + "step": 605 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018066581306017926, + "loss": 0.9461, + "step": 606 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018063380281690142, + "loss": 0.8356, + "step": 607 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018060179257362358, + "loss": 0.8973, + "step": 608 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001805697823303457, + "loss": 0.9058, + "step": 609 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018053777208706787, + "loss": 0.8199, + "step": 610 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018050576184379003, + "loss": 0.9249, + "step": 611 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018047375160051217, + "loss": 0.7857, + "step": 612 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018044174135723433, + "loss": 0.8156, + "step": 613 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018040973111395646, + "loss": 0.6936, + "step": 614 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018037772087067865, + "loss": 0.8705, + "step": 615 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018034571062740078, + "loss": 0.9653, + "step": 616 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001803137003841229, + "loss": 0.8526, + "step": 617 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018028169014084507, + "loss": 0.8326, + "step": 618 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018024967989756723, + "loss": 0.8805, + "step": 619 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001802176696542894, + "loss": 0.8862, + "step": 620 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018018565941101152, + "loss": 0.8578, + "step": 621 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018015364916773368, + "loss": 0.8099, + "step": 622 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018012163892445584, + "loss": 0.8893, + "step": 623 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018008962868117797, + "loss": 0.8373, + "step": 624 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018005761843790013, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001800256081946223, + "loss": 0.879, + "step": 626 + }, + { + "epoch": 0.5, + "learning_rate": 0.00017999359795134443, + "loss": 1.058, + "step": 627 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001799615877080666, + "loss": 0.9124, + "step": 628 + }, + { + "epoch": 0.5, + "learning_rate": 0.00017992957746478875, + "loss": 0.8791, + "step": 629 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001798975672215109, + "loss": 0.8692, + "step": 630 + }, + { + "epoch": 0.5, + "learning_rate": 0.00017986555697823304, + "loss": 0.8888, + "step": 631 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001798335467349552, + "loss": 0.9036, + "step": 632 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017980153649167736, + "loss": 0.9481, + "step": 633 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001797695262483995, + "loss": 0.9276, + "step": 634 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017973751600512165, + "loss": 0.7706, + "step": 635 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017970550576184378, + "loss": 0.9047, + "step": 636 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017967349551856597, + "loss": 0.9271, + "step": 637 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001796414852752881, + "loss": 0.9238, + "step": 638 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017960947503201024, + "loss": 0.8489, + "step": 639 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001795774647887324, + "loss": 0.8193, + "step": 640 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017954545454545456, + "loss": 0.7161, + "step": 641 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017951344430217672, + "loss": 0.7997, + "step": 642 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017948143405889885, + "loss": 0.9374, + "step": 643 + }, + { + "epoch": 0.52, + "learning_rate": 0.000179449423815621, + "loss": 0.8747, + "step": 644 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017941741357234317, + "loss": 0.9518, + "step": 645 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001793854033290653, + "loss": 0.7346, + "step": 646 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017935339308578746, + "loss": 0.7847, + "step": 647 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017932138284250962, + "loss": 0.8505, + "step": 648 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017928937259923175, + "loss": 0.7663, + "step": 649 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001792573623559539, + "loss": 0.7691, + "step": 650 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017922535211267607, + "loss": 0.9292, + "step": 651 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017919334186939823, + "loss": 0.8677, + "step": 652 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017916133162612036, + "loss": 0.7725, + "step": 653 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001791293213828425, + "loss": 0.8182, + "step": 654 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017909731113956468, + "loss": 0.7939, + "step": 655 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017906530089628682, + "loss": 0.8192, + "step": 656 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017903329065300898, + "loss": 0.773, + "step": 657 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001790012804097311, + "loss": 0.8604, + "step": 658 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001789692701664533, + "loss": 0.8956, + "step": 659 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017893725992317543, + "loss": 0.8061, + "step": 660 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017890524967989756, + "loss": 0.8825, + "step": 661 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017887323943661972, + "loss": 0.8392, + "step": 662 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017884122919334188, + "loss": 0.7899, + "step": 663 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017880921895006404, + "loss": 0.7923, + "step": 664 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017877720870678617, + "loss": 0.8045, + "step": 665 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017874519846350833, + "loss": 0.8158, + "step": 666 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001787131882202305, + "loss": 0.8124, + "step": 667 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017868117797695263, + "loss": 0.7424, + "step": 668 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017864916773367479, + "loss": 0.8094, + "step": 669 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017861715749039695, + "loss": 0.7541, + "step": 670 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017858514724711908, + "loss": 0.775, + "step": 671 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017855313700384124, + "loss": 0.8262, + "step": 672 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001785211267605634, + "loss": 0.8062, + "step": 673 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017848911651728556, + "loss": 0.8069, + "step": 674 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001784571062740077, + "loss": 0.8423, + "step": 675 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017842509603072982, + "loss": 0.7926, + "step": 676 + }, + { + "epoch": 0.54, + "learning_rate": 0.000178393085787452, + "loss": 0.6658, + "step": 677 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017836107554417414, + "loss": 0.7671, + "step": 678 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001783290653008963, + "loss": 0.781, + "step": 679 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017829705505761843, + "loss": 0.8081, + "step": 680 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001782650448143406, + "loss": 0.9887, + "step": 681 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017823303457106275, + "loss": 0.9639, + "step": 682 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001782010243277849, + "loss": 0.8063, + "step": 683 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017816901408450705, + "loss": 0.827, + "step": 684 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001781370038412292, + "loss": 0.8054, + "step": 685 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017810499359795137, + "loss": 0.8743, + "step": 686 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001780729833546735, + "loss": 0.8617, + "step": 687 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017804097311139566, + "loss": 0.7558, + "step": 688 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017800896286811782, + "loss": 0.8937, + "step": 689 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017797695262483995, + "loss": 0.8778, + "step": 690 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001779449423815621, + "loss": 0.8409, + "step": 691 + }, + { + "epoch": 0.55, + "learning_rate": 0.00017791293213828427, + "loss": 0.7335, + "step": 692 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001778809218950064, + "loss": 0.8858, + "step": 693 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017784891165172856, + "loss": 0.8102, + "step": 694 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001778169014084507, + "loss": 0.9256, + "step": 695 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017778489116517288, + "loss": 0.8711, + "step": 696 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017775288092189502, + "loss": 0.9271, + "step": 697 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017772087067861715, + "loss": 0.8565, + "step": 698 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001776888604353393, + "loss": 0.8492, + "step": 699 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017765685019206147, + "loss": 0.8809, + "step": 700 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017762483994878363, + "loss": 0.8059, + "step": 701 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017759282970550576, + "loss": 0.8576, + "step": 702 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017756081946222792, + "loss": 0.9853, + "step": 703 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017752880921895008, + "loss": 0.787, + "step": 704 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001774967989756722, + "loss": 0.8237, + "step": 705 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017746478873239437, + "loss": 0.8674, + "step": 706 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017743277848911653, + "loss": 0.8336, + "step": 707 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017740076824583866, + "loss": 0.8832, + "step": 708 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017736875800256082, + "loss": 0.7301, + "step": 709 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017733674775928298, + "loss": 0.7894, + "step": 710 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017730473751600514, + "loss": 0.769, + "step": 711 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017727272727272728, + "loss": 0.7709, + "step": 712 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001772407170294494, + "loss": 0.9674, + "step": 713 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001772087067861716, + "loss": 0.7916, + "step": 714 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017717669654289373, + "loss": 0.7017, + "step": 715 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001771446862996159, + "loss": 0.7493, + "step": 716 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017711267605633802, + "loss": 0.8, + "step": 717 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001770806658130602, + "loss": 0.776, + "step": 718 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017704865556978234, + "loss": 1.0176, + "step": 719 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017701664532650447, + "loss": 0.7593, + "step": 720 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017698463508322663, + "loss": 0.8618, + "step": 721 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001769526248399488, + "loss": 0.7726, + "step": 722 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017692061459667095, + "loss": 0.8271, + "step": 723 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017688860435339309, + "loss": 0.8466, + "step": 724 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017685659411011525, + "loss": 0.789, + "step": 725 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001768245838668374, + "loss": 0.8271, + "step": 726 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017679257362355954, + "loss": 0.7825, + "step": 727 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001767605633802817, + "loss": 0.7793, + "step": 728 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017672855313700386, + "loss": 0.8455, + "step": 729 + }, + { + "epoch": 0.58, + "learning_rate": 0.000176696542893726, + "loss": 0.8552, + "step": 730 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017666453265044815, + "loss": 0.8997, + "step": 731 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001766325224071703, + "loss": 0.8064, + "step": 732 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017660051216389247, + "loss": 0.7565, + "step": 733 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001765685019206146, + "loss": 0.9292, + "step": 734 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017653649167733674, + "loss": 0.8543, + "step": 735 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017650448143405892, + "loss": 0.6477, + "step": 736 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017647247119078105, + "loss": 0.8989, + "step": 737 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017644046094750321, + "loss": 0.8051, + "step": 738 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017640845070422535, + "loss": 0.8427, + "step": 739 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001763764404609475, + "loss": 0.8539, + "step": 740 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017634443021766967, + "loss": 0.8112, + "step": 741 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001763124199743918, + "loss": 0.8282, + "step": 742 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017628040973111396, + "loss": 0.7741, + "step": 743 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017624839948783612, + "loss": 0.8012, + "step": 744 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017621638924455828, + "loss": 0.8223, + "step": 745 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001761843790012804, + "loss": 0.9167, + "step": 746 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017615236875800257, + "loss": 0.8001, + "step": 747 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017612035851472473, + "loss": 0.8137, + "step": 748 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017608834827144686, + "loss": 0.8057, + "step": 749 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017605633802816902, + "loss": 0.8505, + "step": 750 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017602432778489118, + "loss": 0.8453, + "step": 751 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017599231754161332, + "loss": 0.9007, + "step": 752 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017596030729833548, + "loss": 0.734, + "step": 753 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017592829705505764, + "loss": 0.8057, + "step": 754 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001758962868117798, + "loss": 0.8516, + "step": 755 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017586427656850193, + "loss": 0.8686, + "step": 756 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017583226632522406, + "loss": 0.812, + "step": 757 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017580025608194625, + "loss": 0.8649, + "step": 758 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017576824583866838, + "loss": 0.8142, + "step": 759 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017573623559539054, + "loss": 0.861, + "step": 760 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017570422535211267, + "loss": 0.8238, + "step": 761 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017567221510883483, + "loss": 0.8481, + "step": 762 + }, + { + "epoch": 0.61, + "learning_rate": 0.000175640204865557, + "loss": 0.7788, + "step": 763 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017560819462227912, + "loss": 1.0263, + "step": 764 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017557618437900128, + "loss": 0.9379, + "step": 765 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017554417413572344, + "loss": 0.8304, + "step": 766 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017551216389244558, + "loss": 0.8311, + "step": 767 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017548015364916774, + "loss": 0.8748, + "step": 768 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001754481434058899, + "loss": 0.7966, + "step": 769 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017541613316261206, + "loss": 0.8614, + "step": 770 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001753841229193342, + "loss": 0.7993, + "step": 771 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017535211267605635, + "loss": 0.8851, + "step": 772 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001753201024327785, + "loss": 0.7351, + "step": 773 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017528809218950064, + "loss": 0.7081, + "step": 774 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001752560819462228, + "loss": 0.8082, + "step": 775 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017522407170294493, + "loss": 0.7792, + "step": 776 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017519206145966712, + "loss": 0.9288, + "step": 777 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017516005121638925, + "loss": 0.7971, + "step": 778 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017512804097311139, + "loss": 0.9193, + "step": 779 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017509603072983357, + "loss": 0.899, + "step": 780 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001750640204865557, + "loss": 0.7891, + "step": 781 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017503201024327787, + "loss": 0.8288, + "step": 782 + }, + { + "epoch": 0.63, + "learning_rate": 0.000175, + "loss": 0.8687, + "step": 783 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017496798975672216, + "loss": 0.8708, + "step": 784 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017493597951344432, + "loss": 0.833, + "step": 785 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017490396927016645, + "loss": 0.9007, + "step": 786 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001748719590268886, + "loss": 0.7291, + "step": 787 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017483994878361077, + "loss": 0.8407, + "step": 788 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001748079385403329, + "loss": 0.7835, + "step": 789 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017477592829705506, + "loss": 0.8138, + "step": 790 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017474391805377722, + "loss": 0.6527, + "step": 791 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017471190781049938, + "loss": 0.7952, + "step": 792 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017467989756722151, + "loss": 0.7749, + "step": 793 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017464788732394365, + "loss": 0.7828, + "step": 794 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017461587708066583, + "loss": 0.8516, + "step": 795 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017458386683738797, + "loss": 0.9804, + "step": 796 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017455185659411013, + "loss": 0.7898, + "step": 797 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017451984635083226, + "loss": 0.7455, + "step": 798 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017448783610755445, + "loss": 0.9052, + "step": 799 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017445582586427658, + "loss": 0.9169, + "step": 800 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001744238156209987, + "loss": 0.892, + "step": 801 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017439180537772087, + "loss": 0.7793, + "step": 802 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017435979513444303, + "loss": 0.8782, + "step": 803 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001743277848911652, + "loss": 0.7318, + "step": 804 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017429577464788732, + "loss": 0.8258, + "step": 805 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017426376440460948, + "loss": 0.8326, + "step": 806 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017423175416133164, + "loss": 0.8562, + "step": 807 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017419974391805378, + "loss": 0.8148, + "step": 808 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017416773367477594, + "loss": 1.0542, + "step": 809 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001741357234314981, + "loss": 0.7881, + "step": 810 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017410371318822023, + "loss": 0.8953, + "step": 811 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001740717029449424, + "loss": 0.7827, + "step": 812 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017403969270166455, + "loss": 0.846, + "step": 813 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001740076824583867, + "loss": 0.7435, + "step": 814 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017397567221510884, + "loss": 0.7462, + "step": 815 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017394366197183097, + "loss": 0.7516, + "step": 816 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017391165172855316, + "loss": 0.8499, + "step": 817 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001738796414852753, + "loss": 0.8667, + "step": 818 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017384763124199745, + "loss": 0.8456, + "step": 819 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017381562099871959, + "loss": 0.6986, + "step": 820 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017378361075544174, + "loss": 0.9566, + "step": 821 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001737516005121639, + "loss": 0.8834, + "step": 822 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017371959026888604, + "loss": 0.7587, + "step": 823 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001736875800256082, + "loss": 0.8363, + "step": 824 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017365556978233036, + "loss": 0.8066, + "step": 825 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017362355953905252, + "loss": 0.8791, + "step": 826 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017359154929577465, + "loss": 0.9008, + "step": 827 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001735595390524968, + "loss": 0.8289, + "step": 828 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017352752880921897, + "loss": 0.8363, + "step": 829 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001734955185659411, + "loss": 0.8517, + "step": 830 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017346350832266326, + "loss": 0.8568, + "step": 831 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017343149807938542, + "loss": 0.7404, + "step": 832 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017339948783610755, + "loss": 0.8624, + "step": 833 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017336747759282971, + "loss": 0.8341, + "step": 834 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017333546734955187, + "loss": 0.7741, + "step": 835 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017330345710627403, + "loss": 0.875, + "step": 836 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017327144686299617, + "loss": 0.8368, + "step": 837 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001732394366197183, + "loss": 0.7684, + "step": 838 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017320742637644049, + "loss": 0.8006, + "step": 839 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017317541613316262, + "loss": 0.891, + "step": 840 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017314340588988478, + "loss": 0.8282, + "step": 841 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001731113956466069, + "loss": 0.7962, + "step": 842 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017307938540332907, + "loss": 0.9345, + "step": 843 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017304737516005123, + "loss": 0.8022, + "step": 844 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017301536491677336, + "loss": 0.735, + "step": 845 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017298335467349552, + "loss": 0.9587, + "step": 846 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017295134443021768, + "loss": 0.8252, + "step": 847 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017291933418693982, + "loss": 0.7618, + "step": 848 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017288732394366197, + "loss": 0.8104, + "step": 849 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017285531370038413, + "loss": 0.8187, + "step": 850 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001728233034571063, + "loss": 0.8169, + "step": 851 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017279129321382843, + "loss": 0.8646, + "step": 852 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001727592829705506, + "loss": 0.8413, + "step": 853 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017272727272727275, + "loss": 0.7767, + "step": 854 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017269526248399488, + "loss": 0.7317, + "step": 855 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017266325224071704, + "loss": 0.7594, + "step": 856 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001726312419974392, + "loss": 0.9051, + "step": 857 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017259923175416136, + "loss": 0.7638, + "step": 858 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001725672215108835, + "loss": 0.8661, + "step": 859 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017253521126760562, + "loss": 0.8428, + "step": 860 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001725032010243278, + "loss": 0.8634, + "step": 861 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017247119078104994, + "loss": 0.8425, + "step": 862 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001724391805377721, + "loss": 0.7275, + "step": 863 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017240717029449424, + "loss": 0.9025, + "step": 864 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001723751600512164, + "loss": 0.9377, + "step": 865 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017234314980793856, + "loss": 0.8386, + "step": 866 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001723111395646607, + "loss": 0.8123, + "step": 867 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017227912932138285, + "loss": 0.7319, + "step": 868 + }, + { + "epoch": 0.7, + "learning_rate": 0.000172247119078105, + "loss": 0.7258, + "step": 869 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017221510883482714, + "loss": 0.8995, + "step": 870 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001721830985915493, + "loss": 0.7907, + "step": 871 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017215108834827146, + "loss": 0.7364, + "step": 872 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017211907810499362, + "loss": 0.7806, + "step": 873 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017208706786171575, + "loss": 0.9809, + "step": 874 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017205505761843789, + "loss": 0.8114, + "step": 875 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017202304737516007, + "loss": 0.7692, + "step": 876 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001719910371318822, + "loss": 0.9418, + "step": 877 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017195902688860436, + "loss": 0.8379, + "step": 878 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001719270166453265, + "loss": 0.8091, + "step": 879 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017189500640204866, + "loss": 0.7287, + "step": 880 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017186299615877082, + "loss": 0.8372, + "step": 881 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017183098591549295, + "loss": 0.8117, + "step": 882 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017179897567221514, + "loss": 0.9157, + "step": 883 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017176696542893727, + "loss": 0.9549, + "step": 884 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017173495518565943, + "loss": 0.7739, + "step": 885 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017170294494238156, + "loss": 0.7775, + "step": 886 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017167093469910372, + "loss": 0.7471, + "step": 887 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017163892445582588, + "loss": 0.7691, + "step": 888 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017160691421254801, + "loss": 0.9197, + "step": 889 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017157490396927017, + "loss": 0.9203, + "step": 890 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017154289372599233, + "loss": 0.8488, + "step": 891 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017151088348271447, + "loss": 0.7674, + "step": 892 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017147887323943663, + "loss": 0.8455, + "step": 893 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017144686299615879, + "loss": 0.9019, + "step": 894 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017141485275288095, + "loss": 0.8239, + "step": 895 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017138284250960308, + "loss": 0.8681, + "step": 896 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001713508322663252, + "loss": 0.7382, + "step": 897 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001713188220230474, + "loss": 0.8118, + "step": 898 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017128681177976953, + "loss": 0.9801, + "step": 899 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001712548015364917, + "loss": 0.8616, + "step": 900 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017122279129321382, + "loss": 0.8429, + "step": 901 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017119078104993598, + "loss": 0.8818, + "step": 902 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017115877080665814, + "loss": 0.8268, + "step": 903 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017112676056338028, + "loss": 0.9014, + "step": 904 + }, + { + "epoch": 0.72, + "learning_rate": 0.00017109475032010243, + "loss": 0.8034, + "step": 905 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001710627400768246, + "loss": 0.8534, + "step": 906 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017103072983354673, + "loss": 0.8832, + "step": 907 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001709987195902689, + "loss": 0.7666, + "step": 908 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017096670934699105, + "loss": 0.9292, + "step": 909 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001709346991037132, + "loss": 0.8442, + "step": 910 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017090268886043534, + "loss": 0.8922, + "step": 911 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001708706786171575, + "loss": 0.9415, + "step": 912 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017083866837387966, + "loss": 0.727, + "step": 913 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001708066581306018, + "loss": 0.9026, + "step": 914 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017077464788732395, + "loss": 0.8363, + "step": 915 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001707426376440461, + "loss": 0.7485, + "step": 916 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017071062740076827, + "loss": 0.8095, + "step": 917 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001706786171574904, + "loss": 0.8737, + "step": 918 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017064660691421254, + "loss": 0.8117, + "step": 919 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017061459667093472, + "loss": 0.8381, + "step": 920 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017058258642765686, + "loss": 0.8991, + "step": 921 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017055057618437902, + "loss": 0.9253, + "step": 922 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017051856594110115, + "loss": 0.9446, + "step": 923 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001704865556978233, + "loss": 0.7891, + "step": 924 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017045454545454547, + "loss": 0.9345, + "step": 925 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001704225352112676, + "loss": 0.7159, + "step": 926 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017039052496798976, + "loss": 0.8231, + "step": 927 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017035851472471192, + "loss": 0.6187, + "step": 928 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017032650448143405, + "loss": 0.8892, + "step": 929 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001702944942381562, + "loss": 0.8652, + "step": 930 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017026248399487837, + "loss": 0.6449, + "step": 931 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017023047375160053, + "loss": 0.8845, + "step": 932 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017019846350832266, + "loss": 0.7748, + "step": 933 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017016645326504482, + "loss": 0.7577, + "step": 934 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017013444302176698, + "loss": 0.6506, + "step": 935 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017010243277848912, + "loss": 0.8713, + "step": 936 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017007042253521128, + "loss": 1.0087, + "step": 937 + }, + { + "epoch": 0.75, + "learning_rate": 0.00017003841229193344, + "loss": 0.7896, + "step": 938 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001700064020486556, + "loss": 0.8297, + "step": 939 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016997439180537773, + "loss": 0.9284, + "step": 940 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016994238156209986, + "loss": 0.814, + "step": 941 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016991037131882205, + "loss": 0.8792, + "step": 942 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016987836107554418, + "loss": 0.8084, + "step": 943 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016984635083226634, + "loss": 0.8307, + "step": 944 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016981434058898847, + "loss": 0.7677, + "step": 945 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016978233034571063, + "loss": 0.6914, + "step": 946 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001697503201024328, + "loss": 0.9187, + "step": 947 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016971830985915493, + "loss": 0.8614, + "step": 948 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016968629961587709, + "loss": 0.7816, + "step": 949 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016965428937259925, + "loss": 0.6937, + "step": 950 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016962227912932138, + "loss": 0.8123, + "step": 951 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016959026888604354, + "loss": 0.8092, + "step": 952 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001695582586427657, + "loss": 0.8272, + "step": 953 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016952624839948786, + "loss": 0.7629, + "step": 954 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016949423815621, + "loss": 0.8971, + "step": 955 + }, + { + "epoch": 0.76, + "learning_rate": 0.00016946222791293212, + "loss": 0.8397, + "step": 956 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001694302176696543, + "loss": 0.9334, + "step": 957 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016939820742637644, + "loss": 0.8623, + "step": 958 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001693661971830986, + "loss": 0.8331, + "step": 959 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016933418693982076, + "loss": 0.7947, + "step": 960 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001693021766965429, + "loss": 0.8912, + "step": 961 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016927016645326505, + "loss": 0.8452, + "step": 962 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001692381562099872, + "loss": 0.937, + "step": 963 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016920614596670937, + "loss": 0.8295, + "step": 964 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001691741357234315, + "loss": 0.8396, + "step": 965 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016914212548015367, + "loss": 0.8114, + "step": 966 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001691101152368758, + "loss": 0.9572, + "step": 967 + }, + { + "epoch": 0.77, + "learning_rate": 0.00016907810499359796, + "loss": 0.9657, + "step": 968 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016904609475032012, + "loss": 0.8125, + "step": 969 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016901408450704225, + "loss": 0.7858, + "step": 970 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001689820742637644, + "loss": 0.7949, + "step": 971 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016895006402048657, + "loss": 0.9038, + "step": 972 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001689180537772087, + "loss": 0.8575, + "step": 973 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016888604353393086, + "loss": 0.9311, + "step": 974 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016885403329065302, + "loss": 1.0016, + "step": 975 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016882202304737518, + "loss": 0.8346, + "step": 976 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016879001280409732, + "loss": 0.7306, + "step": 977 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016875800256081945, + "loss": 0.8549, + "step": 978 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016872599231754164, + "loss": 0.7818, + "step": 979 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016869398207426377, + "loss": 0.7568, + "step": 980 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016866197183098593, + "loss": 0.8288, + "step": 981 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016862996158770806, + "loss": 0.8381, + "step": 982 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016859795134443022, + "loss": 0.88, + "step": 983 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016856594110115238, + "loss": 0.8837, + "step": 984 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001685339308578745, + "loss": 0.8789, + "step": 985 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001685019206145967, + "loss": 1.0149, + "step": 986 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016846991037131883, + "loss": 0.8407, + "step": 987 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016843790012804097, + "loss": 0.7562, + "step": 988 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016840588988476312, + "loss": 0.8392, + "step": 989 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016837387964148528, + "loss": 0.839, + "step": 990 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016834186939820744, + "loss": 0.8957, + "step": 991 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016830985915492958, + "loss": 0.8003, + "step": 992 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016827784891165174, + "loss": 0.8817, + "step": 993 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001682458386683739, + "loss": 0.7596, + "step": 994 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016821382842509603, + "loss": 0.8686, + "step": 995 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001681818181818182, + "loss": 0.8168, + "step": 996 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016814980793854035, + "loss": 0.8803, + "step": 997 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001681177976952625, + "loss": 0.8371, + "step": 998 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016808578745198464, + "loss": 0.7736, + "step": 999 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016805377720870677, + "loss": 0.8171, + "step": 1000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016802176696542896, + "loss": 0.9794, + "step": 1001 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001679897567221511, + "loss": 0.8088, + "step": 1002 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016795774647887325, + "loss": 0.7904, + "step": 1003 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016792573623559539, + "loss": 0.6922, + "step": 1004 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016789372599231755, + "loss": 0.894, + "step": 1005 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001678617157490397, + "loss": 0.7121, + "step": 1006 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016782970550576184, + "loss": 0.7592, + "step": 1007 + }, + { + "epoch": 0.81, + "learning_rate": 0.000167797695262484, + "loss": 0.8724, + "step": 1008 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016776568501920616, + "loss": 0.7687, + "step": 1009 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001677336747759283, + "loss": 0.8957, + "step": 1010 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016770166453265045, + "loss": 0.883, + "step": 1011 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001676696542893726, + "loss": 0.7848, + "step": 1012 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016763764404609477, + "loss": 0.9225, + "step": 1013 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001676056338028169, + "loss": 0.8078, + "step": 1014 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016757362355953906, + "loss": 0.8168, + "step": 1015 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016754161331626122, + "loss": 0.7901, + "step": 1016 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016750960307298336, + "loss": 0.8682, + "step": 1017 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016747759282970551, + "loss": 0.973, + "step": 1018 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016744558258642767, + "loss": 0.757, + "step": 1019 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001674135723431498, + "loss": 0.871, + "step": 1020 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016738156209987197, + "loss": 0.7839, + "step": 1021 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001673495518565941, + "loss": 0.8795, + "step": 1022 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001673175416133163, + "loss": 0.9069, + "step": 1023 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016728553137003842, + "loss": 0.8072, + "step": 1024 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016725352112676058, + "loss": 0.7672, + "step": 1025 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001672215108834827, + "loss": 0.8948, + "step": 1026 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016718950064020487, + "loss": 0.9456, + "step": 1027 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016715749039692703, + "loss": 0.7235, + "step": 1028 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016712548015364916, + "loss": 1.0129, + "step": 1029 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016709346991037132, + "loss": 0.8455, + "step": 1030 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016706145966709348, + "loss": 0.8691, + "step": 1031 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016702944942381562, + "loss": 0.8706, + "step": 1032 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016699743918053778, + "loss": 0.9251, + "step": 1033 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016696542893725994, + "loss": 0.739, + "step": 1034 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001669334186939821, + "loss": 0.87, + "step": 1035 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016690140845070423, + "loss": 0.7144, + "step": 1036 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001668693982074264, + "loss": 0.7677, + "step": 1037 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016683738796414855, + "loss": 0.8331, + "step": 1038 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016680537772087068, + "loss": 0.983, + "step": 1039 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016677336747759284, + "loss": 0.7403, + "step": 1040 + }, + { + "epoch": 0.83, + "learning_rate": 0.000166741357234315, + "loss": 0.8796, + "step": 1041 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016670934699103713, + "loss": 0.7391, + "step": 1042 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001666773367477593, + "loss": 0.8915, + "step": 1043 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016664532650448143, + "loss": 0.7894, + "step": 1044 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001666133162612036, + "loss": 0.8159, + "step": 1045 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016658130601792574, + "loss": 0.7515, + "step": 1046 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016654929577464788, + "loss": 0.7361, + "step": 1047 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016651728553137004, + "loss": 0.7795, + "step": 1048 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001664852752880922, + "loss": 0.7818, + "step": 1049 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016645326504481436, + "loss": 0.7256, + "step": 1050 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001664212548015365, + "loss": 0.79, + "step": 1051 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016638924455825865, + "loss": 0.7785, + "step": 1052 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001663572343149808, + "loss": 0.7881, + "step": 1053 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016632522407170294, + "loss": 0.8496, + "step": 1054 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001662932138284251, + "loss": 0.93, + "step": 1055 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016626120358514726, + "loss": 0.9037, + "step": 1056 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016622919334186942, + "loss": 0.8355, + "step": 1057 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016619718309859155, + "loss": 0.7729, + "step": 1058 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001661651728553137, + "loss": 0.845, + "step": 1059 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016613316261203587, + "loss": 0.8216, + "step": 1060 + }, + { + "epoch": 0.85, + "learning_rate": 0.000166101152368758, + "loss": 0.917, + "step": 1061 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016606914212548017, + "loss": 0.8077, + "step": 1062 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016603713188220233, + "loss": 0.8584, + "step": 1063 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016600512163892446, + "loss": 0.765, + "step": 1064 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016597311139564662, + "loss": 0.7088, + "step": 1065 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016594110115236875, + "loss": 0.8962, + "step": 1066 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016590909090909094, + "loss": 0.8149, + "step": 1067 + }, + { + "epoch": 0.85, + "learning_rate": 0.00016587708066581307, + "loss": 0.8342, + "step": 1068 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001658450704225352, + "loss": 0.9165, + "step": 1069 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016581306017925736, + "loss": 0.8714, + "step": 1070 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016578104993597952, + "loss": 0.9016, + "step": 1071 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016574903969270168, + "loss": 0.9178, + "step": 1072 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016571702944942382, + "loss": 0.8045, + "step": 1073 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016568501920614597, + "loss": 0.8108, + "step": 1074 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016565300896286813, + "loss": 0.7894, + "step": 1075 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016562099871959027, + "loss": 0.7714, + "step": 1076 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016558898847631243, + "loss": 0.7615, + "step": 1077 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001655569782330346, + "loss": 0.9111, + "step": 1078 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016552496798975675, + "loss": 0.6991, + "step": 1079 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016549295774647888, + "loss": 0.8983, + "step": 1080 + }, + { + "epoch": 0.86, + "learning_rate": 0.000165460947503201, + "loss": 0.8262, + "step": 1081 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001654289372599232, + "loss": 0.8302, + "step": 1082 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016539692701664533, + "loss": 0.9348, + "step": 1083 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001653649167733675, + "loss": 0.8503, + "step": 1084 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016533290653008962, + "loss": 0.8291, + "step": 1085 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016530089628681178, + "loss": 0.7829, + "step": 1086 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016526888604353394, + "loss": 0.7694, + "step": 1087 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016523687580025608, + "loss": 0.8052, + "step": 1088 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016520486555697826, + "loss": 0.8735, + "step": 1089 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001651728553137004, + "loss": 0.8246, + "step": 1090 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016514084507042253, + "loss": 0.8225, + "step": 1091 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001651088348271447, + "loss": 0.8438, + "step": 1092 + }, + { + "epoch": 0.87, + "learning_rate": 0.00016507682458386685, + "loss": 0.7338, + "step": 1093 + }, + { + "epoch": 0.88, + "learning_rate": 0.000165044814340589, + "loss": 0.8122, + "step": 1094 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016501280409731114, + "loss": 0.9026, + "step": 1095 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001649807938540333, + "loss": 0.7971, + "step": 1096 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016494878361075546, + "loss": 0.7791, + "step": 1097 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001649167733674776, + "loss": 0.8555, + "step": 1098 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016488476312419975, + "loss": 0.7929, + "step": 1099 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001648527528809219, + "loss": 0.7231, + "step": 1100 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016482074263764405, + "loss": 0.7696, + "step": 1101 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001647887323943662, + "loss": 0.8284, + "step": 1102 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016475672215108834, + "loss": 0.8297, + "step": 1103 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016472471190781052, + "loss": 0.7617, + "step": 1104 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016469270166453266, + "loss": 0.8579, + "step": 1105 + }, + { + "epoch": 0.88, + "learning_rate": 0.00016466069142125482, + "loss": 0.8955, + "step": 1106 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016462868117797695, + "loss": 0.8237, + "step": 1107 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001645966709346991, + "loss": 0.8213, + "step": 1108 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016456466069142127, + "loss": 0.81, + "step": 1109 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001645326504481434, + "loss": 1.0071, + "step": 1110 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016450064020486556, + "loss": 0.8321, + "step": 1111 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016446862996158772, + "loss": 0.7857, + "step": 1112 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016443661971830985, + "loss": 0.8592, + "step": 1113 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016440460947503201, + "loss": 0.8426, + "step": 1114 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016437259923175417, + "loss": 0.688, + "step": 1115 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016434058898847633, + "loss": 0.7351, + "step": 1116 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016430857874519847, + "loss": 0.8982, + "step": 1117 + }, + { + "epoch": 0.89, + "learning_rate": 0.00016427656850192063, + "loss": 0.7535, + "step": 1118 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016424455825864279, + "loss": 0.8489, + "step": 1119 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016421254801536492, + "loss": 0.9313, + "step": 1120 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016418053777208708, + "loss": 0.7996, + "step": 1121 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016414852752880924, + "loss": 0.8349, + "step": 1122 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016411651728553137, + "loss": 0.8766, + "step": 1123 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016408450704225353, + "loss": 0.7425, + "step": 1124 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016405249679897566, + "loss": 0.8088, + "step": 1125 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016402048655569785, + "loss": 0.7724, + "step": 1126 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016398847631241998, + "loss": 0.7568, + "step": 1127 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016395646606914212, + "loss": 0.8736, + "step": 1128 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016392445582586428, + "loss": 0.852, + "step": 1129 + }, + { + "epoch": 0.9, + "learning_rate": 0.00016389244558258643, + "loss": 0.8514, + "step": 1130 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001638604353393086, + "loss": 0.8352, + "step": 1131 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016382842509603073, + "loss": 0.7726, + "step": 1132 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001637964148527529, + "loss": 0.8638, + "step": 1133 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016376440460947505, + "loss": 0.7749, + "step": 1134 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016373239436619718, + "loss": 0.8643, + "step": 1135 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016370038412291934, + "loss": 0.7193, + "step": 1136 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001636683738796415, + "loss": 0.7939, + "step": 1137 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016363636363636366, + "loss": 0.7662, + "step": 1138 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001636043533930858, + "loss": 0.7581, + "step": 1139 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016357234314980795, + "loss": 0.8214, + "step": 1140 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001635403329065301, + "loss": 0.797, + "step": 1141 + }, + { + "epoch": 0.91, + "learning_rate": 0.00016350832266325224, + "loss": 0.8902, + "step": 1142 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001634763124199744, + "loss": 0.9522, + "step": 1143 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016344430217669656, + "loss": 0.7986, + "step": 1144 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001634122919334187, + "loss": 0.813, + "step": 1145 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016338028169014086, + "loss": 0.7778, + "step": 1146 + }, + { + "epoch": 0.92, + "learning_rate": 0.000163348271446863, + "loss": 0.7929, + "step": 1147 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016331626120358518, + "loss": 0.8318, + "step": 1148 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001632842509603073, + "loss": 0.8964, + "step": 1149 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016325224071702944, + "loss": 0.8236, + "step": 1150 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001632202304737516, + "loss": 0.8395, + "step": 1151 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016318822023047376, + "loss": 0.7612, + "step": 1152 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016315620998719592, + "loss": 0.8671, + "step": 1153 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016312419974391805, + "loss": 0.8454, + "step": 1154 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001630921895006402, + "loss": 0.837, + "step": 1155 + }, + { + "epoch": 0.92, + "learning_rate": 0.00016306017925736237, + "loss": 0.9328, + "step": 1156 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001630281690140845, + "loss": 0.7665, + "step": 1157 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016299615877080666, + "loss": 0.8216, + "step": 1158 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016296414852752882, + "loss": 0.8768, + "step": 1159 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016293213828425098, + "loss": 0.8369, + "step": 1160 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016290012804097312, + "loss": 0.8003, + "step": 1161 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016286811779769525, + "loss": 0.8642, + "step": 1162 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016283610755441744, + "loss": 0.9401, + "step": 1163 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016280409731113957, + "loss": 0.8097, + "step": 1164 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016277208706786173, + "loss": 0.9433, + "step": 1165 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001627400768245839, + "loss": 0.8477, + "step": 1166 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016270806658130602, + "loss": 0.8546, + "step": 1167 + }, + { + "epoch": 0.93, + "learning_rate": 0.00016267605633802818, + "loss": 0.7654, + "step": 1168 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016264404609475031, + "loss": 0.8259, + "step": 1169 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001626120358514725, + "loss": 0.8793, + "step": 1170 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016258002560819463, + "loss": 0.8391, + "step": 1171 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016254801536491677, + "loss": 0.8865, + "step": 1172 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016251600512163893, + "loss": 0.7563, + "step": 1173 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016248399487836109, + "loss": 0.8389, + "step": 1174 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016245198463508325, + "loss": 0.7511, + "step": 1175 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016241997439180538, + "loss": 0.8317, + "step": 1176 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016238796414852754, + "loss": 0.7243, + "step": 1177 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001623559539052497, + "loss": 0.8274, + "step": 1178 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016232394366197183, + "loss": 0.8048, + "step": 1179 + }, + { + "epoch": 0.94, + "learning_rate": 0.000162291933418694, + "loss": 0.8419, + "step": 1180 + }, + { + "epoch": 0.94, + "learning_rate": 0.00016225992317541615, + "loss": 0.8228, + "step": 1181 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016222791293213828, + "loss": 0.7862, + "step": 1182 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016219590268886044, + "loss": 0.78, + "step": 1183 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016216389244558258, + "loss": 0.9222, + "step": 1184 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016213188220230476, + "loss": 1.0484, + "step": 1185 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001620998719590269, + "loss": 0.8025, + "step": 1186 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016206786171574903, + "loss": 0.808, + "step": 1187 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001620358514724712, + "loss": 0.8624, + "step": 1188 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016200384122919335, + "loss": 0.8888, + "step": 1189 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001619718309859155, + "loss": 0.8384, + "step": 1190 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016193982074263764, + "loss": 0.789, + "step": 1191 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016190781049935983, + "loss": 0.841, + "step": 1192 + }, + { + "epoch": 0.95, + "learning_rate": 0.00016187580025608196, + "loss": 0.895, + "step": 1193 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001618437900128041, + "loss": 0.8373, + "step": 1194 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016181177976952625, + "loss": 0.923, + "step": 1195 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001617797695262484, + "loss": 0.936, + "step": 1196 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016174775928297057, + "loss": 0.8757, + "step": 1197 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001617157490396927, + "loss": 0.8439, + "step": 1198 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016168373879641486, + "loss": 0.8421, + "step": 1199 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016165172855313702, + "loss": 0.8203, + "step": 1200 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016161971830985916, + "loss": 0.9084, + "step": 1201 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016158770806658132, + "loss": 0.8856, + "step": 1202 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016155569782330348, + "loss": 0.8032, + "step": 1203 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001615236875800256, + "loss": 0.7867, + "step": 1204 + }, + { + "epoch": 0.96, + "learning_rate": 0.00016149167733674777, + "loss": 0.8778, + "step": 1205 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001614596670934699, + "loss": 0.7473, + "step": 1206 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001614276568501921, + "loss": 0.7665, + "step": 1207 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016139564660691422, + "loss": 0.8323, + "step": 1208 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016136363636363635, + "loss": 0.9359, + "step": 1209 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001613316261203585, + "loss": 0.9023, + "step": 1210 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016129961587708067, + "loss": 0.6753, + "step": 1211 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016126760563380283, + "loss": 0.9202, + "step": 1212 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016123559539052497, + "loss": 0.7696, + "step": 1213 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016120358514724713, + "loss": 0.8322, + "step": 1214 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016117157490396928, + "loss": 0.7525, + "step": 1215 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016113956466069142, + "loss": 0.7661, + "step": 1216 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016110755441741358, + "loss": 0.7832, + "step": 1217 + }, + { + "epoch": 0.97, + "learning_rate": 0.00016107554417413574, + "loss": 0.8472, + "step": 1218 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001610435339308579, + "loss": 0.8848, + "step": 1219 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016101152368758003, + "loss": 0.7783, + "step": 1220 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001609795134443022, + "loss": 0.8594, + "step": 1221 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016094750320102435, + "loss": 0.8967, + "step": 1222 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016091549295774648, + "loss": 0.8321, + "step": 1223 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016088348271446864, + "loss": 0.7883, + "step": 1224 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001608514724711908, + "loss": 0.8831, + "step": 1225 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016081946222791293, + "loss": 0.8175, + "step": 1226 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001607874519846351, + "loss": 0.7739, + "step": 1227 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016075544174135723, + "loss": 0.7986, + "step": 1228 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001607234314980794, + "loss": 0.7464, + "step": 1229 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016069142125480155, + "loss": 0.7166, + "step": 1230 + }, + { + "epoch": 0.98, + "learning_rate": 0.00016065941101152368, + "loss": 0.8422, + "step": 1231 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016062740076824584, + "loss": 0.7834, + "step": 1232 + }, + { + "epoch": 0.99, + "learning_rate": 0.000160595390524968, + "loss": 0.7863, + "step": 1233 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016056338028169016, + "loss": 0.8836, + "step": 1234 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001605313700384123, + "loss": 0.8128, + "step": 1235 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016049935979513445, + "loss": 0.7955, + "step": 1236 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001604673495518566, + "loss": 0.9122, + "step": 1237 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016043533930857874, + "loss": 0.7628, + "step": 1238 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001604033290653009, + "loss": 0.7983, + "step": 1239 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016037131882202306, + "loss": 0.7842, + "step": 1240 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001603393085787452, + "loss": 0.8116, + "step": 1241 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016030729833546736, + "loss": 0.9165, + "step": 1242 + }, + { + "epoch": 0.99, + "learning_rate": 0.00016027528809218951, + "loss": 0.7712, + "step": 1243 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016024327784891167, + "loss": 0.8211, + "step": 1244 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001602112676056338, + "loss": 0.8548, + "step": 1245 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016017925736235597, + "loss": 0.7731, + "step": 1246 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016014724711907813, + "loss": 0.7631, + "step": 1247 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016011523687580026, + "loss": 0.9419, + "step": 1248 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016008322663252242, + "loss": 0.6864, + "step": 1249 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016005121638924455, + "loss": 0.8766, + "step": 1250 + }, + { + "epoch": 1.0, + "learning_rate": 0.00016001920614596674, + "loss": 0.7305, + "step": 1251 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015998719590268887, + "loss": 0.821, + "step": 1252 + }, + { + "epoch": 1.0, + "learning_rate": 0.000159955185659411, + "loss": 0.7628, + "step": 1253 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015992317541613316, + "loss": 0.6833, + "step": 1254 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015989116517285532, + "loss": 0.8862, + "step": 1255 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015985915492957748, + "loss": 0.7829, + "step": 1256 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015982714468629962, + "loss": 0.8492, + "step": 1257 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015979513444302178, + "loss": 0.8174, + "step": 1258 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015976312419974394, + "loss": 0.7324, + "step": 1259 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015973111395646607, + "loss": 0.8772, + "step": 1260 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015969910371318823, + "loss": 0.843, + "step": 1261 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001596670934699104, + "loss": 0.793, + "step": 1262 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015963508322663252, + "loss": 0.7718, + "step": 1263 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015960307298335468, + "loss": 0.9135, + "step": 1264 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001595710627400768, + "loss": 0.7182, + "step": 1265 + }, + { + "epoch": 1.01, + "learning_rate": 0.000159539052496799, + "loss": 0.7075, + "step": 1266 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015950704225352113, + "loss": 0.7862, + "step": 1267 + }, + { + "epoch": 1.01, + "learning_rate": 0.00015947503201024327, + "loss": 0.8643, + "step": 1268 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015944302176696545, + "loss": 0.8241, + "step": 1269 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015941101152368759, + "loss": 0.7758, + "step": 1270 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015937900128040974, + "loss": 0.7544, + "step": 1271 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015934699103713188, + "loss": 0.8678, + "step": 1272 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015931498079385406, + "loss": 0.7558, + "step": 1273 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001592829705505762, + "loss": 0.7393, + "step": 1274 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015925096030729833, + "loss": 1.0046, + "step": 1275 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001592189500640205, + "loss": 0.6813, + "step": 1276 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015918693982074265, + "loss": 0.9063, + "step": 1277 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001591549295774648, + "loss": 0.773, + "step": 1278 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015912291933418694, + "loss": 0.8058, + "step": 1279 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001590909090909091, + "loss": 0.8138, + "step": 1280 + }, + { + "epoch": 1.02, + "learning_rate": 0.00015905889884763126, + "loss": 0.8457, + "step": 1281 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001590268886043534, + "loss": 0.7412, + "step": 1282 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015899487836107555, + "loss": 0.7379, + "step": 1283 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015896286811779771, + "loss": 0.79, + "step": 1284 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015893085787451985, + "loss": 0.7496, + "step": 1285 + }, + { + "epoch": 1.03, + "learning_rate": 0.000158898847631242, + "loss": 0.7759, + "step": 1286 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015886683738796414, + "loss": 0.7461, + "step": 1287 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015883482714468633, + "loss": 0.8152, + "step": 1288 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015880281690140846, + "loss": 0.6619, + "step": 1289 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001587708066581306, + "loss": 0.6811, + "step": 1290 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015873879641485275, + "loss": 0.7638, + "step": 1291 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001587067861715749, + "loss": 0.8279, + "step": 1292 + }, + { + "epoch": 1.03, + "learning_rate": 0.00015867477592829707, + "loss": 0.7919, + "step": 1293 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001586427656850192, + "loss": 0.8066, + "step": 1294 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015861075544174136, + "loss": 0.7825, + "step": 1295 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015857874519846352, + "loss": 0.8071, + "step": 1296 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015854673495518566, + "loss": 0.7476, + "step": 1297 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015851472471190782, + "loss": 0.8105, + "step": 1298 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015848271446862997, + "loss": 0.8574, + "step": 1299 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015845070422535213, + "loss": 0.6954, + "step": 1300 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015841869398207427, + "loss": 0.6404, + "step": 1301 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015838668373879643, + "loss": 0.8067, + "step": 1302 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001583546734955186, + "loss": 0.7663, + "step": 1303 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015832266325224072, + "loss": 0.7391, + "step": 1304 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015829065300896288, + "loss": 0.7501, + "step": 1305 + }, + { + "epoch": 1.04, + "learning_rate": 0.00015825864276568504, + "loss": 0.73, + "step": 1306 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015822663252240717, + "loss": 0.8249, + "step": 1307 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015819462227912933, + "loss": 0.7858, + "step": 1308 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015816261203585146, + "loss": 0.7624, + "step": 1309 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015813060179257365, + "loss": 0.7692, + "step": 1310 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015809859154929578, + "loss": 0.7569, + "step": 1311 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015806658130601792, + "loss": 0.8599, + "step": 1312 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015803457106274008, + "loss": 0.78, + "step": 1313 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015800256081946224, + "loss": 0.7294, + "step": 1314 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001579705505761844, + "loss": 0.7562, + "step": 1315 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015793854033290653, + "loss": 0.7559, + "step": 1316 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001579065300896287, + "loss": 0.8064, + "step": 1317 + }, + { + "epoch": 1.05, + "learning_rate": 0.00015787451984635085, + "loss": 0.8606, + "step": 1318 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015784250960307298, + "loss": 0.8431, + "step": 1319 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015781049935979514, + "loss": 0.8695, + "step": 1320 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001577784891165173, + "loss": 0.7834, + "step": 1321 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015774647887323943, + "loss": 0.8134, + "step": 1322 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001577144686299616, + "loss": 0.7756, + "step": 1323 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015768245838668375, + "loss": 0.8375, + "step": 1324 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001576504481434059, + "loss": 0.8352, + "step": 1325 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015761843790012805, + "loss": 0.8797, + "step": 1326 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001575864276568502, + "loss": 0.8347, + "step": 1327 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015755441741357236, + "loss": 0.8626, + "step": 1328 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001575224071702945, + "loss": 0.7528, + "step": 1329 + }, + { + "epoch": 1.06, + "learning_rate": 0.00015749039692701666, + "loss": 0.807, + "step": 1330 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001574583866837388, + "loss": 0.7309, + "step": 1331 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015742637644046098, + "loss": 0.7916, + "step": 1332 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001573943661971831, + "loss": 0.7863, + "step": 1333 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015736235595390524, + "loss": 0.7182, + "step": 1334 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001573303457106274, + "loss": 0.7021, + "step": 1335 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015729833546734956, + "loss": 0.8308, + "step": 1336 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015726632522407172, + "loss": 0.8533, + "step": 1337 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015723431498079385, + "loss": 0.7562, + "step": 1338 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015720230473751601, + "loss": 0.7798, + "step": 1339 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015717029449423817, + "loss": 0.7456, + "step": 1340 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001571382842509603, + "loss": 0.7887, + "step": 1341 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015710627400768247, + "loss": 0.8712, + "step": 1342 + }, + { + "epoch": 1.07, + "learning_rate": 0.00015707426376440463, + "loss": 0.7809, + "step": 1343 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015704225352112676, + "loss": 0.7527, + "step": 1344 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015701024327784892, + "loss": 0.8252, + "step": 1345 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015697823303457108, + "loss": 0.7615, + "step": 1346 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015694622279129324, + "loss": 0.7895, + "step": 1347 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015691421254801537, + "loss": 0.7395, + "step": 1348 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001568822023047375, + "loss": 0.7583, + "step": 1349 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001568501920614597, + "loss": 0.7255, + "step": 1350 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015681818181818182, + "loss": 0.7934, + "step": 1351 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015678617157490398, + "loss": 0.8444, + "step": 1352 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015675416133162612, + "loss": 0.7627, + "step": 1353 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015672215108834828, + "loss": 0.6533, + "step": 1354 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015669014084507043, + "loss": 0.8006, + "step": 1355 + }, + { + "epoch": 1.08, + "learning_rate": 0.00015665813060179257, + "loss": 0.7667, + "step": 1356 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015662612035851473, + "loss": 0.7063, + "step": 1357 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001565941101152369, + "loss": 0.8588, + "step": 1358 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015656209987195905, + "loss": 0.8057, + "step": 1359 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015653008962868118, + "loss": 0.8413, + "step": 1360 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015649807938540334, + "loss": 0.7312, + "step": 1361 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001564660691421255, + "loss": 0.7998, + "step": 1362 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015643405889884763, + "loss": 0.9268, + "step": 1363 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001564020486555698, + "loss": 0.9466, + "step": 1364 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015637003841229195, + "loss": 0.7932, + "step": 1365 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015633802816901408, + "loss": 0.744, + "step": 1366 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015630601792573624, + "loss": 0.7963, + "step": 1367 + }, + { + "epoch": 1.09, + "learning_rate": 0.00015627400768245838, + "loss": 0.799, + "step": 1368 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015624199743918056, + "loss": 0.7829, + "step": 1369 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001562099871959027, + "loss": 0.7367, + "step": 1370 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015617797695262483, + "loss": 0.7996, + "step": 1371 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015614596670934702, + "loss": 0.7828, + "step": 1372 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015611395646606915, + "loss": 0.7755, + "step": 1373 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001560819462227913, + "loss": 0.7365, + "step": 1374 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015604993597951344, + "loss": 0.7415, + "step": 1375 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001560179257362356, + "loss": 0.7704, + "step": 1376 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015598591549295776, + "loss": 0.6882, + "step": 1377 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001559539052496799, + "loss": 0.7461, + "step": 1378 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015592189500640205, + "loss": 0.8627, + "step": 1379 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001558898847631242, + "loss": 0.8533, + "step": 1380 + }, + { + "epoch": 1.1, + "learning_rate": 0.00015585787451984635, + "loss": 0.81, + "step": 1381 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001558258642765685, + "loss": 0.7401, + "step": 1382 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015579385403329066, + "loss": 0.8605, + "step": 1383 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015576184379001282, + "loss": 0.8051, + "step": 1384 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015572983354673496, + "loss": 0.8358, + "step": 1385 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015569782330345712, + "loss": 0.7555, + "step": 1386 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015566581306017928, + "loss": 0.7716, + "step": 1387 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001556338028169014, + "loss": 0.7215, + "step": 1388 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015560179257362357, + "loss": 0.7867, + "step": 1389 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001555697823303457, + "loss": 0.7081, + "step": 1390 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001555377720870679, + "loss": 0.8732, + "step": 1391 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015550576184379002, + "loss": 0.7357, + "step": 1392 + }, + { + "epoch": 1.11, + "learning_rate": 0.00015547375160051215, + "loss": 0.9173, + "step": 1393 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015544174135723431, + "loss": 0.8296, + "step": 1394 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015540973111395647, + "loss": 0.7953, + "step": 1395 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015537772087067863, + "loss": 0.7846, + "step": 1396 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015534571062740077, + "loss": 0.7175, + "step": 1397 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015531370038412293, + "loss": 0.7458, + "step": 1398 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015528169014084509, + "loss": 0.6837, + "step": 1399 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015524967989756722, + "loss": 0.8058, + "step": 1400 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015521766965428938, + "loss": 0.8511, + "step": 1401 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015518565941101154, + "loss": 1.0781, + "step": 1402 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015515364916773367, + "loss": 0.7767, + "step": 1403 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015512163892445583, + "loss": 0.8578, + "step": 1404 + }, + { + "epoch": 1.12, + "learning_rate": 0.000155089628681178, + "loss": 0.8774, + "step": 1405 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015505761843790015, + "loss": 0.796, + "step": 1406 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015502560819462228, + "loss": 0.8432, + "step": 1407 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015499359795134442, + "loss": 0.7676, + "step": 1408 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001549615877080666, + "loss": 0.7811, + "step": 1409 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015492957746478874, + "loss": 0.821, + "step": 1410 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001548975672215109, + "loss": 0.8126, + "step": 1411 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015486555697823303, + "loss": 0.8039, + "step": 1412 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015483354673495521, + "loss": 0.7433, + "step": 1413 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015480153649167735, + "loss": 0.7994, + "step": 1414 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015476952624839948, + "loss": 0.8593, + "step": 1415 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015473751600512164, + "loss": 0.888, + "step": 1416 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001547055057618438, + "loss": 0.7323, + "step": 1417 + }, + { + "epoch": 1.13, + "learning_rate": 0.00015467349551856596, + "loss": 0.7995, + "step": 1418 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001546414852752881, + "loss": 0.8355, + "step": 1419 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015460947503201025, + "loss": 0.8602, + "step": 1420 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001545774647887324, + "loss": 0.8036, + "step": 1421 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015454545454545454, + "loss": 0.8361, + "step": 1422 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001545134443021767, + "loss": 0.6903, + "step": 1423 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015448143405889886, + "loss": 0.7859, + "step": 1424 + }, + { + "epoch": 1.14, + "learning_rate": 0.000154449423815621, + "loss": 0.678, + "step": 1425 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015441741357234316, + "loss": 0.8705, + "step": 1426 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015438540332906532, + "loss": 0.8014, + "step": 1427 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015435339308578748, + "loss": 0.8278, + "step": 1428 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001543213828425096, + "loss": 0.7633, + "step": 1429 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015428937259923174, + "loss": 0.8391, + "step": 1430 + }, + { + "epoch": 1.14, + "learning_rate": 0.00015425736235595393, + "loss": 0.9004, + "step": 1431 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015422535211267606, + "loss": 0.7682, + "step": 1432 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015419334186939822, + "loss": 0.8185, + "step": 1433 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015416133162612035, + "loss": 0.6972, + "step": 1434 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001541293213828425, + "loss": 0.7351, + "step": 1435 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015409731113956467, + "loss": 0.7794, + "step": 1436 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001540653008962868, + "loss": 0.8494, + "step": 1437 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015403329065300897, + "loss": 0.895, + "step": 1438 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015400128040973113, + "loss": 0.7281, + "step": 1439 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015396927016645328, + "loss": 0.7994, + "step": 1440 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015393725992317542, + "loss": 0.7465, + "step": 1441 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015390524967989758, + "loss": 0.7384, + "step": 1442 + }, + { + "epoch": 1.15, + "learning_rate": 0.00015387323943661974, + "loss": 0.9285, + "step": 1443 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015384122919334187, + "loss": 0.7307, + "step": 1444 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015380921895006403, + "loss": 0.8531, + "step": 1445 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001537772087067862, + "loss": 0.7822, + "step": 1446 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015374519846350832, + "loss": 0.7575, + "step": 1447 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015371318822023048, + "loss": 0.6788, + "step": 1448 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015368117797695264, + "loss": 0.7673, + "step": 1449 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001536491677336748, + "loss": 0.7529, + "step": 1450 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015361715749039693, + "loss": 0.8443, + "step": 1451 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015358514724711907, + "loss": 0.8187, + "step": 1452 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015355313700384125, + "loss": 0.8934, + "step": 1453 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015352112676056339, + "loss": 0.7839, + "step": 1454 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015348911651728555, + "loss": 0.7308, + "step": 1455 + }, + { + "epoch": 1.16, + "learning_rate": 0.00015345710627400768, + "loss": 0.6911, + "step": 1456 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015342509603072984, + "loss": 0.7407, + "step": 1457 + }, + { + "epoch": 1.17, + "learning_rate": 0.000153393085787452, + "loss": 0.8135, + "step": 1458 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015336107554417413, + "loss": 0.7502, + "step": 1459 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001533290653008963, + "loss": 0.7934, + "step": 1460 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015329705505761845, + "loss": 0.8031, + "step": 1461 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015326504481434058, + "loss": 0.8114, + "step": 1462 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015323303457106274, + "loss": 0.7653, + "step": 1463 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001532010243277849, + "loss": 0.8415, + "step": 1464 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015316901408450706, + "loss": 0.8392, + "step": 1465 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001531370038412292, + "loss": 0.8775, + "step": 1466 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015310499359795136, + "loss": 0.7011, + "step": 1467 + }, + { + "epoch": 1.17, + "learning_rate": 0.00015307298335467351, + "loss": 0.8593, + "step": 1468 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015304097311139565, + "loss": 0.864, + "step": 1469 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001530089628681178, + "loss": 0.7902, + "step": 1470 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015297695262483994, + "loss": 0.9209, + "step": 1471 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015294494238156213, + "loss": 0.8251, + "step": 1472 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015291293213828426, + "loss": 0.7643, + "step": 1473 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001528809218950064, + "loss": 0.7794, + "step": 1474 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015284891165172858, + "loss": 0.8476, + "step": 1475 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001528169014084507, + "loss": 0.8143, + "step": 1476 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015278489116517287, + "loss": 0.8373, + "step": 1477 + }, + { + "epoch": 1.18, + "learning_rate": 0.000152752880921895, + "loss": 0.7823, + "step": 1478 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015272087067861716, + "loss": 0.6537, + "step": 1479 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015268886043533932, + "loss": 0.7732, + "step": 1480 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015265685019206146, + "loss": 0.7946, + "step": 1481 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015262483994878362, + "loss": 0.8311, + "step": 1482 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015259282970550578, + "loss": 0.9314, + "step": 1483 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001525608194622279, + "loss": 0.8991, + "step": 1484 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015252880921895007, + "loss": 0.764, + "step": 1485 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015249679897567223, + "loss": 0.7203, + "step": 1486 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001524647887323944, + "loss": 0.8256, + "step": 1487 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015243277848911652, + "loss": 0.6777, + "step": 1488 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015240076824583865, + "loss": 0.8374, + "step": 1489 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015236875800256084, + "loss": 0.7811, + "step": 1490 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015233674775928297, + "loss": 0.795, + "step": 1491 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015230473751600513, + "loss": 0.7515, + "step": 1492 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015227272727272727, + "loss": 0.8313, + "step": 1493 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015224071702944943, + "loss": 0.8, + "step": 1494 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015220870678617159, + "loss": 0.6592, + "step": 1495 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015217669654289372, + "loss": 0.8699, + "step": 1496 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015214468629961588, + "loss": 0.9075, + "step": 1497 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015211267605633804, + "loss": 0.7307, + "step": 1498 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001520806658130602, + "loss": 0.7924, + "step": 1499 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015204865556978233, + "loss": 0.6903, + "step": 1500 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001520166453265045, + "loss": 0.767, + "step": 1501 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015198463508322665, + "loss": 0.9658, + "step": 1502 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015195262483994878, + "loss": 0.8506, + "step": 1503 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015192061459667094, + "loss": 0.7243, + "step": 1504 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001518886043533931, + "loss": 0.7994, + "step": 1505 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015185659411011523, + "loss": 0.8432, + "step": 1506 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001518245838668374, + "loss": 0.7705, + "step": 1507 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015179257362355955, + "loss": 0.8179, + "step": 1508 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015176056338028171, + "loss": 0.8934, + "step": 1509 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015172855313700385, + "loss": 0.8337, + "step": 1510 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015169654289372598, + "loss": 0.7848, + "step": 1511 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015166453265044817, + "loss": 0.7159, + "step": 1512 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001516325224071703, + "loss": 0.8245, + "step": 1513 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015160051216389246, + "loss": 0.7876, + "step": 1514 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001515685019206146, + "loss": 0.7292, + "step": 1515 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015153649167733675, + "loss": 0.88, + "step": 1516 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001515044814340589, + "loss": 0.7606, + "step": 1517 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015147247119078104, + "loss": 0.8915, + "step": 1518 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001514404609475032, + "loss": 0.7974, + "step": 1519 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015140845070422536, + "loss": 0.8591, + "step": 1520 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001513764404609475, + "loss": 0.76, + "step": 1521 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015134443021766966, + "loss": 0.6582, + "step": 1522 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015131241997439182, + "loss": 0.7636, + "step": 1523 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015128040973111397, + "loss": 0.7579, + "step": 1524 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001512483994878361, + "loss": 0.7617, + "step": 1525 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015121638924455827, + "loss": 0.8255, + "step": 1526 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015118437900128043, + "loss": 0.9101, + "step": 1527 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015115236875800256, + "loss": 0.8265, + "step": 1528 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015112035851472472, + "loss": 0.8122, + "step": 1529 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015108834827144688, + "loss": 0.785, + "step": 1530 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015105633802816904, + "loss": 0.881, + "step": 1531 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015102432778489117, + "loss": 0.8246, + "step": 1532 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001509923175416133, + "loss": 0.8312, + "step": 1533 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001509603072983355, + "loss": 0.8054, + "step": 1534 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015092829705505762, + "loss": 0.8018, + "step": 1535 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015089628681177978, + "loss": 0.8094, + "step": 1536 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015086427656850192, + "loss": 0.7646, + "step": 1537 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015083226632522408, + "loss": 0.7819, + "step": 1538 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015080025608194624, + "loss": 0.8075, + "step": 1539 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015076824583866837, + "loss": 0.6932, + "step": 1540 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015073623559539053, + "loss": 0.7649, + "step": 1541 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001507042253521127, + "loss": 0.7814, + "step": 1542 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015067221510883482, + "loss": 0.7637, + "step": 1543 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015064020486555698, + "loss": 0.9169, + "step": 1544 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015060819462227914, + "loss": 0.8295, + "step": 1545 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001505761843790013, + "loss": 0.8248, + "step": 1546 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015054417413572343, + "loss": 0.748, + "step": 1547 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015051216389244557, + "loss": 0.7917, + "step": 1548 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015048015364916775, + "loss": 0.803, + "step": 1549 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015044814340588989, + "loss": 0.7679, + "step": 1550 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015041613316261205, + "loss": 0.7322, + "step": 1551 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001503841229193342, + "loss": 0.8185, + "step": 1552 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015035211267605636, + "loss": 0.8821, + "step": 1553 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001503201024327785, + "loss": 0.7855, + "step": 1554 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015028809218950063, + "loss": 0.7525, + "step": 1555 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015025608194622282, + "loss": 0.7629, + "step": 1556 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015022407170294495, + "loss": 0.7666, + "step": 1557 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001501920614596671, + "loss": 0.706, + "step": 1558 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015016005121638924, + "loss": 0.7744, + "step": 1559 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001501280409731114, + "loss": 0.8378, + "step": 1560 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015009603072983356, + "loss": 0.884, + "step": 1561 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001500640204865557, + "loss": 0.9005, + "step": 1562 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015003201024327785, + "loss": 0.7622, + "step": 1563 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015000000000000001, + "loss": 0.7539, + "step": 1564 + }, + { + "epoch": 1.25, + "learning_rate": 0.00014996798975672215, + "loss": 0.8841, + "step": 1565 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001499359795134443, + "loss": 0.7795, + "step": 1566 + }, + { + "epoch": 1.25, + "learning_rate": 0.00014990396927016647, + "loss": 0.7759, + "step": 1567 + }, + { + "epoch": 1.25, + "learning_rate": 0.00014987195902688863, + "loss": 0.8808, + "step": 1568 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014983994878361076, + "loss": 0.8022, + "step": 1569 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001498079385403329, + "loss": 0.8546, + "step": 1570 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014977592829705508, + "loss": 0.8223, + "step": 1571 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001497439180537772, + "loss": 0.8803, + "step": 1572 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014971190781049937, + "loss": 0.836, + "step": 1573 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001496798975672215, + "loss": 0.8962, + "step": 1574 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014964788732394366, + "loss": 0.8101, + "step": 1575 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014961587708066582, + "loss": 0.8347, + "step": 1576 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014958386683738796, + "loss": 0.9249, + "step": 1577 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014955185659411014, + "loss": 0.746, + "step": 1578 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014951984635083228, + "loss": 0.7876, + "step": 1579 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014948783610755443, + "loss": 0.8489, + "step": 1580 + }, + { + "epoch": 1.26, + "learning_rate": 0.00014945582586427657, + "loss": 0.7888, + "step": 1581 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014942381562099873, + "loss": 0.7828, + "step": 1582 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001493918053777209, + "loss": 0.6971, + "step": 1583 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014935979513444302, + "loss": 0.8581, + "step": 1584 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014932778489116518, + "loss": 0.7731, + "step": 1585 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014929577464788734, + "loss": 0.8862, + "step": 1586 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014926376440460947, + "loss": 0.7576, + "step": 1587 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014923175416133163, + "loss": 0.8686, + "step": 1588 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001491997439180538, + "loss": 0.8595, + "step": 1589 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014916773367477595, + "loss": 0.8491, + "step": 1590 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014913572343149808, + "loss": 0.6411, + "step": 1591 + }, + { + "epoch": 1.27, + "learning_rate": 0.00014910371318822022, + "loss": 0.9346, + "step": 1592 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001490717029449424, + "loss": 0.7693, + "step": 1593 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014903969270166454, + "loss": 0.7544, + "step": 1594 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001490076824583867, + "loss": 0.6368, + "step": 1595 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014897567221510883, + "loss": 0.7607, + "step": 1596 + }, + { + "epoch": 1.28, + "learning_rate": 0.000148943661971831, + "loss": 0.7344, + "step": 1597 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014891165172855315, + "loss": 0.8042, + "step": 1598 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014887964148527528, + "loss": 0.8853, + "step": 1599 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014884763124199744, + "loss": 0.7796, + "step": 1600 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001488156209987196, + "loss": 0.8594, + "step": 1601 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014878361075544173, + "loss": 0.8939, + "step": 1602 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001487516005121639, + "loss": 0.744, + "step": 1603 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014871959026888605, + "loss": 0.8537, + "step": 1604 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001486875800256082, + "loss": 0.7636, + "step": 1605 + }, + { + "epoch": 1.28, + "learning_rate": 0.00014865556978233035, + "loss": 0.8777, + "step": 1606 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001486235595390525, + "loss": 0.7943, + "step": 1607 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014859154929577467, + "loss": 0.7552, + "step": 1608 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001485595390524968, + "loss": 0.8262, + "step": 1609 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014852752880921896, + "loss": 0.7661, + "step": 1610 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014849551856594112, + "loss": 0.7442, + "step": 1611 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014846350832266328, + "loss": 0.9476, + "step": 1612 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001484314980793854, + "loss": 0.7575, + "step": 1613 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014839948783610754, + "loss": 0.8325, + "step": 1614 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014836747759282973, + "loss": 0.7925, + "step": 1615 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014833546734955186, + "loss": 0.8862, + "step": 1616 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014830345710627402, + "loss": 0.7349, + "step": 1617 + }, + { + "epoch": 1.29, + "learning_rate": 0.00014827144686299615, + "loss": 0.736, + "step": 1618 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014823943661971831, + "loss": 0.8037, + "step": 1619 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014820742637644047, + "loss": 0.8439, + "step": 1620 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001481754161331626, + "loss": 0.7942, + "step": 1621 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014814340588988477, + "loss": 0.8167, + "step": 1622 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014811139564660693, + "loss": 0.8849, + "step": 1623 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014807938540332906, + "loss": 0.8071, + "step": 1624 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014804737516005122, + "loss": 0.8058, + "step": 1625 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014801536491677338, + "loss": 0.7679, + "step": 1626 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014798335467349554, + "loss": 0.8025, + "step": 1627 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014795134443021767, + "loss": 0.7375, + "step": 1628 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014791933418693983, + "loss": 0.9299, + "step": 1629 + }, + { + "epoch": 1.3, + "learning_rate": 0.000147887323943662, + "loss": 0.7056, + "step": 1630 + }, + { + "epoch": 1.3, + "learning_rate": 0.00014785531370038412, + "loss": 0.8508, + "step": 1631 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014782330345710628, + "loss": 0.7971, + "step": 1632 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014779129321382844, + "loss": 0.7834, + "step": 1633 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001477592829705506, + "loss": 0.7162, + "step": 1634 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014772727272727274, + "loss": 0.8369, + "step": 1635 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014769526248399487, + "loss": 0.8172, + "step": 1636 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014766325224071705, + "loss": 0.804, + "step": 1637 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001476312419974392, + "loss": 0.8805, + "step": 1638 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014759923175416135, + "loss": 0.8724, + "step": 1639 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014756722151088348, + "loss": 0.9489, + "step": 1640 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014753521126760564, + "loss": 0.8291, + "step": 1641 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001475032010243278, + "loss": 0.799, + "step": 1642 + }, + { + "epoch": 1.31, + "learning_rate": 0.00014747119078104993, + "loss": 0.7886, + "step": 1643 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001474391805377721, + "loss": 0.7437, + "step": 1644 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014740717029449425, + "loss": 0.7589, + "step": 1645 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014737516005121638, + "loss": 0.7261, + "step": 1646 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014734314980793854, + "loss": 0.7568, + "step": 1647 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001473111395646607, + "loss": 0.7779, + "step": 1648 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014727912932138286, + "loss": 0.8424, + "step": 1649 + }, + { + "epoch": 1.32, + "learning_rate": 0.000147247119078105, + "loss": 0.9575, + "step": 1650 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014721510883482713, + "loss": 0.8215, + "step": 1651 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014718309859154932, + "loss": 0.8809, + "step": 1652 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014715108834827145, + "loss": 0.9328, + "step": 1653 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001471190781049936, + "loss": 0.824, + "step": 1654 + }, + { + "epoch": 1.32, + "learning_rate": 0.00014708706786171577, + "loss": 0.8128, + "step": 1655 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001470550576184379, + "loss": 0.8321, + "step": 1656 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014702304737516006, + "loss": 0.7925, + "step": 1657 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001469910371318822, + "loss": 0.7691, + "step": 1658 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014695902688860438, + "loss": 0.7419, + "step": 1659 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001469270166453265, + "loss": 0.7686, + "step": 1660 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014689500640204865, + "loss": 0.715, + "step": 1661 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001468629961587708, + "loss": 0.7874, + "step": 1662 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014683098591549297, + "loss": 0.8192, + "step": 1663 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014679897567221513, + "loss": 0.6889, + "step": 1664 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014676696542893726, + "loss": 0.8251, + "step": 1665 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014673495518565942, + "loss": 0.7909, + "step": 1666 + }, + { + "epoch": 1.33, + "learning_rate": 0.00014670294494238158, + "loss": 0.7737, + "step": 1667 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001466709346991037, + "loss": 0.8291, + "step": 1668 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014663892445582587, + "loss": 0.8179, + "step": 1669 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014660691421254803, + "loss": 0.7561, + "step": 1670 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001465749039692702, + "loss": 0.8656, + "step": 1671 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014654289372599232, + "loss": 0.8404, + "step": 1672 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014651088348271445, + "loss": 0.796, + "step": 1673 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014647887323943664, + "loss": 0.7077, + "step": 1674 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014644686299615877, + "loss": 0.874, + "step": 1675 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014641485275288093, + "loss": 0.718, + "step": 1676 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014638284250960307, + "loss": 0.7866, + "step": 1677 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014635083226632523, + "loss": 0.8582, + "step": 1678 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014631882202304739, + "loss": 0.7491, + "step": 1679 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014628681177976952, + "loss": 0.7662, + "step": 1680 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001462548015364917, + "loss": 0.8673, + "step": 1681 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014622279129321384, + "loss": 0.8003, + "step": 1682 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014619078104993597, + "loss": 0.7525, + "step": 1683 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014615877080665813, + "loss": 0.8586, + "step": 1684 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001461267605633803, + "loss": 0.7801, + "step": 1685 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014609475032010245, + "loss": 0.8113, + "step": 1686 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014606274007682458, + "loss": 0.8013, + "step": 1687 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014603072983354674, + "loss": 0.8769, + "step": 1688 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001459987195902689, + "loss": 0.7501, + "step": 1689 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014596670934699104, + "loss": 0.7747, + "step": 1690 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001459346991037132, + "loss": 0.7341, + "step": 1691 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014590268886043536, + "loss": 0.7857, + "step": 1692 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014587067861715751, + "loss": 0.9432, + "step": 1693 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014583866837387965, + "loss": 0.7982, + "step": 1694 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014580665813060178, + "loss": 0.8055, + "step": 1695 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014577464788732397, + "loss": 0.8128, + "step": 1696 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001457426376440461, + "loss": 0.7193, + "step": 1697 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014571062740076826, + "loss": 0.7919, + "step": 1698 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001456786171574904, + "loss": 0.7772, + "step": 1699 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014564660691421255, + "loss": 0.8278, + "step": 1700 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001456145966709347, + "loss": 0.7938, + "step": 1701 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014558258642765684, + "loss": 0.8413, + "step": 1702 + }, + { + "epoch": 1.36, + "learning_rate": 0.000145550576184379, + "loss": 0.8201, + "step": 1703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014551856594110116, + "loss": 0.8155, + "step": 1704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001454865556978233, + "loss": 0.7686, + "step": 1705 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014545454545454546, + "loss": 0.7208, + "step": 1706 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014542253521126762, + "loss": 0.8199, + "step": 1707 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014539052496798978, + "loss": 0.7633, + "step": 1708 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001453585147247119, + "loss": 0.8063, + "step": 1709 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014532650448143407, + "loss": 0.8905, + "step": 1710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014529449423815623, + "loss": 0.8149, + "step": 1711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014526248399487836, + "loss": 0.7763, + "step": 1712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014523047375160052, + "loss": 0.8105, + "step": 1713 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014519846350832268, + "loss": 0.9777, + "step": 1714 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001451664532650448, + "loss": 0.9211, + "step": 1715 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014513444302176697, + "loss": 0.8483, + "step": 1716 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001451024327784891, + "loss": 0.7226, + "step": 1717 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001450704225352113, + "loss": 0.8005, + "step": 1718 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014503841229193343, + "loss": 0.7437, + "step": 1719 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014500640204865559, + "loss": 0.8249, + "step": 1720 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014497439180537772, + "loss": 0.6258, + "step": 1721 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014494238156209988, + "loss": 0.7421, + "step": 1722 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014491037131882204, + "loss": 0.7896, + "step": 1723 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014487836107554417, + "loss": 0.6566, + "step": 1724 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014484635083226633, + "loss": 0.7335, + "step": 1725 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001448143405889885, + "loss": 0.735, + "step": 1726 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014478233034571062, + "loss": 0.9428, + "step": 1727 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014475032010243278, + "loss": 0.7685, + "step": 1728 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014471830985915494, + "loss": 0.8517, + "step": 1729 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001446862996158771, + "loss": 0.6876, + "step": 1730 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014465428937259923, + "loss": 0.8987, + "step": 1731 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001446222791293214, + "loss": 0.7934, + "step": 1732 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014459026888604355, + "loss": 0.8222, + "step": 1733 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001445582586427657, + "loss": 0.8085, + "step": 1734 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014452624839948785, + "loss": 0.8252, + "step": 1735 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014449423815621, + "loss": 0.7478, + "step": 1736 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014446222791293214, + "loss": 0.7776, + "step": 1737 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001444302176696543, + "loss": 0.8924, + "step": 1738 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014439820742637643, + "loss": 0.912, + "step": 1739 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014436619718309862, + "loss": 0.7564, + "step": 1740 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014433418693982075, + "loss": 0.7776, + "step": 1741 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014430217669654288, + "loss": 0.7541, + "step": 1742 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014427016645326504, + "loss": 0.8976, + "step": 1743 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001442381562099872, + "loss": 0.8021, + "step": 1744 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014420614596670936, + "loss": 0.7639, + "step": 1745 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001441741357234315, + "loss": 0.7838, + "step": 1746 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014414212548015366, + "loss": 0.9058, + "step": 1747 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014411011523687582, + "loss": 0.8116, + "step": 1748 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014407810499359795, + "loss": 0.8207, + "step": 1749 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001440460947503201, + "loss": 0.869, + "step": 1750 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014401408450704227, + "loss": 0.7934, + "step": 1751 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014398207426376443, + "loss": 0.741, + "step": 1752 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014395006402048656, + "loss": 0.6828, + "step": 1753 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001439180537772087, + "loss": 0.7805, + "step": 1754 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014388604353393088, + "loss": 1.0021, + "step": 1755 + }, + { + "epoch": 1.4, + "learning_rate": 0.000143854033290653, + "loss": 0.7014, + "step": 1756 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014382202304737517, + "loss": 0.8346, + "step": 1757 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014379001280409733, + "loss": 0.8753, + "step": 1758 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014375800256081946, + "loss": 0.742, + "step": 1759 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014372599231754162, + "loss": 0.7709, + "step": 1760 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014369398207426376, + "loss": 0.6313, + "step": 1761 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014366197183098594, + "loss": 0.7081, + "step": 1762 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014362996158770808, + "loss": 0.8308, + "step": 1763 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001435979513444302, + "loss": 0.781, + "step": 1764 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014356594110115237, + "loss": 0.8683, + "step": 1765 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014353393085787453, + "loss": 0.7069, + "step": 1766 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001435019206145967, + "loss": 0.7619, + "step": 1767 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014346991037131882, + "loss": 0.7792, + "step": 1768 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014343790012804098, + "loss": 0.8741, + "step": 1769 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014340588988476314, + "loss": 0.6966, + "step": 1770 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014337387964148527, + "loss": 0.8611, + "step": 1771 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014334186939820743, + "loss": 0.8527, + "step": 1772 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001433098591549296, + "loss": 0.9097, + "step": 1773 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014327784891165175, + "loss": 0.7858, + "step": 1774 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014324583866837389, + "loss": 1.0002, + "step": 1775 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014321382842509602, + "loss": 0.9138, + "step": 1776 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001431818181818182, + "loss": 0.8807, + "step": 1777 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014314980793854034, + "loss": 0.922, + "step": 1778 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001431177976952625, + "loss": 0.7861, + "step": 1779 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014308578745198463, + "loss": 0.7813, + "step": 1780 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001430537772087068, + "loss": 0.8319, + "step": 1781 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014302176696542895, + "loss": 0.7703, + "step": 1782 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014298975672215108, + "loss": 0.7735, + "step": 1783 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014295774647887324, + "loss": 0.786, + "step": 1784 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001429257362355954, + "loss": 0.8819, + "step": 1785 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014289372599231753, + "loss": 0.8143, + "step": 1786 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001428617157490397, + "loss": 0.8072, + "step": 1787 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014282970550576185, + "loss": 0.7692, + "step": 1788 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014279769526248401, + "loss": 0.7239, + "step": 1789 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014276568501920615, + "loss": 0.7584, + "step": 1790 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001427336747759283, + "loss": 0.7888, + "step": 1791 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014270166453265047, + "loss": 0.8133, + "step": 1792 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001426696542893726, + "loss": 0.8262, + "step": 1793 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014263764404609476, + "loss": 0.6968, + "step": 1794 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014260563380281692, + "loss": 0.7521, + "step": 1795 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014257362355953905, + "loss": 0.6863, + "step": 1796 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001425416133162612, + "loss": 0.7868, + "step": 1797 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014250960307298334, + "loss": 0.7872, + "step": 1798 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014247759282970553, + "loss": 0.8016, + "step": 1799 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014244558258642766, + "loss": 0.8514, + "step": 1800 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001424135723431498, + "loss": 0.8743, + "step": 1801 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014238156209987196, + "loss": 0.7418, + "step": 1802 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014234955185659412, + "loss": 0.854, + "step": 1803 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014231754161331628, + "loss": 0.7746, + "step": 1804 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001422855313700384, + "loss": 0.7321, + "step": 1805 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014225352112676057, + "loss": 0.7725, + "step": 1806 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014222151088348273, + "loss": 0.9379, + "step": 1807 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014218950064020486, + "loss": 0.7166, + "step": 1808 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215749039692702, + "loss": 0.7212, + "step": 1809 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014212548015364918, + "loss": 0.8487, + "step": 1810 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014209346991037134, + "loss": 0.9052, + "step": 1811 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014206145966709347, + "loss": 0.7333, + "step": 1812 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014202944942381563, + "loss": 0.7515, + "step": 1813 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001419974391805378, + "loss": 0.7751, + "step": 1814 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014196542893725992, + "loss": 0.7935, + "step": 1815 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014193341869398208, + "loss": 0.7858, + "step": 1816 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014190140845070424, + "loss": 0.7857, + "step": 1817 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014186939820742638, + "loss": 0.7668, + "step": 1818 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014183738796414854, + "loss": 0.8822, + "step": 1819 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014180537772087067, + "loss": 0.6709, + "step": 1820 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014177336747759286, + "loss": 0.7848, + "step": 1821 + }, + { + "epoch": 1.46, + "learning_rate": 0.000141741357234315, + "loss": 0.6744, + "step": 1822 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014170934699103712, + "loss": 0.8619, + "step": 1823 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014167733674775928, + "loss": 0.7224, + "step": 1824 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014164532650448144, + "loss": 0.7624, + "step": 1825 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001416133162612036, + "loss": 0.8271, + "step": 1826 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014158130601792573, + "loss": 0.9406, + "step": 1827 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001415492957746479, + "loss": 0.8056, + "step": 1828 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014151728553137005, + "loss": 0.8948, + "step": 1829 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014148527528809219, + "loss": 0.7166, + "step": 1830 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014145326504481435, + "loss": 0.7185, + "step": 1831 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001414212548015365, + "loss": 0.8773, + "step": 1832 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014138924455825867, + "loss": 0.891, + "step": 1833 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001413572343149808, + "loss": 0.7602, + "step": 1834 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014132522407170296, + "loss": 0.726, + "step": 1835 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014129321382842512, + "loss": 0.6867, + "step": 1836 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014126120358514725, + "loss": 0.7673, + "step": 1837 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001412291933418694, + "loss": 0.8232, + "step": 1838 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014119718309859157, + "loss": 0.8625, + "step": 1839 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001411651728553137, + "loss": 0.7699, + "step": 1840 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014113316261203586, + "loss": 0.9115, + "step": 1841 + }, + { + "epoch": 1.47, + "learning_rate": 0.000141101152368758, + "loss": 0.6825, + "step": 1842 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014106914212548018, + "loss": 0.8604, + "step": 1843 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014103713188220231, + "loss": 0.8227, + "step": 1844 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014100512163892445, + "loss": 0.8292, + "step": 1845 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001409731113956466, + "loss": 0.8363, + "step": 1846 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014094110115236877, + "loss": 0.8236, + "step": 1847 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014090909090909093, + "loss": 0.7956, + "step": 1848 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014087708066581306, + "loss": 0.708, + "step": 1849 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014084507042253522, + "loss": 0.8345, + "step": 1850 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014081306017925738, + "loss": 0.8184, + "step": 1851 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001407810499359795, + "loss": 0.7525, + "step": 1852 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014074903969270167, + "loss": 0.7258, + "step": 1853 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014071702944942383, + "loss": 0.7488, + "step": 1854 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014068501920614596, + "loss": 0.9129, + "step": 1855 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014065300896286812, + "loss": 0.9012, + "step": 1856 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014062099871959026, + "loss": 0.8658, + "step": 1857 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014058898847631244, + "loss": 0.8366, + "step": 1858 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014055697823303458, + "loss": 0.9142, + "step": 1859 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014052496798975674, + "loss": 0.7863, + "step": 1860 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014049295774647887, + "loss": 0.7065, + "step": 1861 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014046094750320103, + "loss": 0.7804, + "step": 1862 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001404289372599232, + "loss": 0.823, + "step": 1863 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014039692701664532, + "loss": 0.8132, + "step": 1864 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001403649167733675, + "loss": 0.8052, + "step": 1865 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014033290653008964, + "loss": 0.9199, + "step": 1866 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014030089628681177, + "loss": 0.8074, + "step": 1867 + }, + { + "epoch": 1.49, + "learning_rate": 0.00014026888604353393, + "loss": 0.8003, + "step": 1868 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001402368758002561, + "loss": 0.7303, + "step": 1869 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014020486555697825, + "loss": 0.847, + "step": 1870 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014017285531370038, + "loss": 0.8361, + "step": 1871 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014014084507042254, + "loss": 0.8121, + "step": 1872 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001401088348271447, + "loss": 0.8575, + "step": 1873 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014007682458386684, + "loss": 0.7784, + "step": 1874 + }, + { + "epoch": 1.5, + "learning_rate": 0.000140044814340589, + "loss": 0.7621, + "step": 1875 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014001280409731116, + "loss": 0.754, + "step": 1876 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001399807938540333, + "loss": 0.8696, + "step": 1877 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013994878361075545, + "loss": 0.7023, + "step": 1878 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013991677336747758, + "loss": 0.7708, + "step": 1879 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013988476312419977, + "loss": 0.8324, + "step": 1880 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001398527528809219, + "loss": 0.7259, + "step": 1881 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013982074263764403, + "loss": 0.8394, + "step": 1882 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001397887323943662, + "loss": 0.7512, + "step": 1883 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013975672215108835, + "loss": 0.8402, + "step": 1884 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001397247119078105, + "loss": 0.8124, + "step": 1885 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013969270166453265, + "loss": 0.7244, + "step": 1886 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001396606914212548, + "loss": 0.8578, + "step": 1887 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013962868117797697, + "loss": 0.8294, + "step": 1888 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001395966709346991, + "loss": 0.7087, + "step": 1889 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013956466069142126, + "loss": 0.831, + "step": 1890 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013953265044814342, + "loss": 0.8128, + "step": 1891 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013950064020486558, + "loss": 0.726, + "step": 1892 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001394686299615877, + "loss": 0.7587, + "step": 1893 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013943661971830987, + "loss": 0.8507, + "step": 1894 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013940460947503203, + "loss": 0.679, + "step": 1895 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013937259923175416, + "loss": 0.8174, + "step": 1896 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013934058898847632, + "loss": 0.8223, + "step": 1897 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013930857874519848, + "loss": 0.752, + "step": 1898 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013927656850192061, + "loss": 0.8452, + "step": 1899 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013924455825864277, + "loss": 0.8213, + "step": 1900 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001392125480153649, + "loss": 0.7748, + "step": 1901 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001391805377720871, + "loss": 0.8694, + "step": 1902 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013914852752880923, + "loss": 0.7582, + "step": 1903 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013911651728553136, + "loss": 0.8288, + "step": 1904 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013908450704225352, + "loss": 0.7937, + "step": 1905 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013905249679897568, + "loss": 0.7749, + "step": 1906 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013902048655569784, + "loss": 0.7771, + "step": 1907 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013898847631241997, + "loss": 0.8423, + "step": 1908 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013895646606914213, + "loss": 0.9121, + "step": 1909 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001389244558258643, + "loss": 0.7859, + "step": 1910 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013889244558258642, + "loss": 0.9535, + "step": 1911 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013886043533930858, + "loss": 0.6877, + "step": 1912 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013882842509603074, + "loss": 0.9029, + "step": 1913 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001387964148527529, + "loss": 0.8638, + "step": 1914 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013876440460947504, + "loss": 0.7333, + "step": 1915 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001387323943661972, + "loss": 0.7361, + "step": 1916 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013870038412291936, + "loss": 0.8098, + "step": 1917 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001386683738796415, + "loss": 0.9054, + "step": 1918 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013863636363636365, + "loss": 0.8737, + "step": 1919 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001386043533930858, + "loss": 0.9087, + "step": 1920 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013857234314980794, + "loss": 0.6999, + "step": 1921 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001385403329065301, + "loss": 0.6442, + "step": 1922 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013850832266325223, + "loss": 0.6952, + "step": 1923 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013847631241997442, + "loss": 0.9183, + "step": 1924 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013844430217669655, + "loss": 0.694, + "step": 1925 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013841229193341868, + "loss": 0.7194, + "step": 1926 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013838028169014084, + "loss": 0.8059, + "step": 1927 + }, + { + "epoch": 1.54, + "learning_rate": 0.000138348271446863, + "loss": 0.7858, + "step": 1928 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013831626120358516, + "loss": 0.8135, + "step": 1929 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001382842509603073, + "loss": 0.8308, + "step": 1930 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013825224071702946, + "loss": 0.8387, + "step": 1931 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013822023047375162, + "loss": 0.7318, + "step": 1932 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013818822023047375, + "loss": 0.7419, + "step": 1933 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001381562099871959, + "loss": 0.8114, + "step": 1934 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013812419974391807, + "loss": 0.6677, + "step": 1935 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001380921895006402, + "loss": 0.7079, + "step": 1936 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013806017925736236, + "loss": 0.892, + "step": 1937 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013802816901408452, + "loss": 0.7486, + "step": 1938 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013799615877080668, + "loss": 0.6975, + "step": 1939 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001379641485275288, + "loss": 0.6812, + "step": 1940 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013793213828425097, + "loss": 0.8656, + "step": 1941 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013790012804097313, + "loss": 0.7954, + "step": 1942 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013786811779769527, + "loss": 0.8582, + "step": 1943 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013783610755441743, + "loss": 0.9114, + "step": 1944 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013780409731113956, + "loss": 0.8069, + "step": 1945 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013777208706786174, + "loss": 0.9102, + "step": 1946 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013774007682458388, + "loss": 0.7862, + "step": 1947 + }, + { + "epoch": 1.56, + "learning_rate": 0.000137708066581306, + "loss": 0.9053, + "step": 1948 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013767605633802817, + "loss": 0.6623, + "step": 1949 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013764404609475033, + "loss": 0.8222, + "step": 1950 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001376120358514725, + "loss": 0.8463, + "step": 1951 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013758002560819462, + "loss": 0.8529, + "step": 1952 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013754801536491678, + "loss": 0.7915, + "step": 1953 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013751600512163894, + "loss": 0.7528, + "step": 1954 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013748399487836107, + "loss": 0.8282, + "step": 1955 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013745198463508323, + "loss": 0.7087, + "step": 1956 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001374199743918054, + "loss": 0.8415, + "step": 1957 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013738796414852753, + "loss": 0.8534, + "step": 1958 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001373559539052497, + "loss": 0.8408, + "step": 1959 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013732394366197182, + "loss": 0.815, + "step": 1960 + }, + { + "epoch": 1.57, + "learning_rate": 0.000137291933418694, + "loss": 0.8512, + "step": 1961 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013725992317541614, + "loss": 0.9043, + "step": 1962 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013722791293213827, + "loss": 0.8152, + "step": 1963 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013719590268886043, + "loss": 0.7459, + "step": 1964 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001371638924455826, + "loss": 0.8359, + "step": 1965 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013713188220230475, + "loss": 0.6972, + "step": 1966 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013709987195902688, + "loss": 0.9094, + "step": 1967 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013706786171574904, + "loss": 0.7326, + "step": 1968 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001370358514724712, + "loss": 0.7704, + "step": 1969 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013700384122919334, + "loss": 0.8843, + "step": 1970 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001369718309859155, + "loss": 0.8769, + "step": 1971 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013693982074263766, + "loss": 0.7074, + "step": 1972 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013690781049935982, + "loss": 0.6944, + "step": 1973 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013687580025608195, + "loss": 0.8209, + "step": 1974 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001368437900128041, + "loss": 0.7799, + "step": 1975 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013681177976952627, + "loss": 0.7663, + "step": 1976 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001367797695262484, + "loss": 0.8629, + "step": 1977 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013674775928297056, + "loss": 0.7364, + "step": 1978 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013671574903969272, + "loss": 0.699, + "step": 1979 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013668373879641485, + "loss": 0.7545, + "step": 1980 + }, + { + "epoch": 1.58, + "learning_rate": 0.000136651728553137, + "loss": 0.7783, + "step": 1981 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013661971830985914, + "loss": 0.8319, + "step": 1982 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013658770806658133, + "loss": 0.7703, + "step": 1983 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013655569782330346, + "loss": 0.9258, + "step": 1984 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001365236875800256, + "loss": 0.8609, + "step": 1985 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013649167733674776, + "loss": 0.8083, + "step": 1986 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013645966709346992, + "loss": 0.8553, + "step": 1987 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013642765685019208, + "loss": 0.7923, + "step": 1988 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001363956466069142, + "loss": 0.823, + "step": 1989 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013636363636363637, + "loss": 0.7516, + "step": 1990 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013633162612035853, + "loss": 0.7102, + "step": 1991 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013629961587708066, + "loss": 0.7908, + "step": 1992 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013626760563380282, + "loss": 0.7762, + "step": 1993 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013623559539052498, + "loss": 0.8503, + "step": 1994 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013620358514724711, + "loss": 0.7719, + "step": 1995 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013617157490396927, + "loss": 0.7436, + "step": 1996 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013613956466069143, + "loss": 0.7638, + "step": 1997 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001361075544174136, + "loss": 0.7241, + "step": 1998 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013607554417413573, + "loss": 0.7975, + "step": 1999 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013604353393085789, + "loss": 0.8595, + "step": 2000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013601152368758005, + "loss": 0.8379, + "step": 2001 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013597951344430218, + "loss": 0.9449, + "step": 2002 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013594750320102434, + "loss": 0.6544, + "step": 2003 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013591549295774647, + "loss": 0.791, + "step": 2004 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013588348271446866, + "loss": 0.9656, + "step": 2005 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001358514724711908, + "loss": 0.7185, + "step": 2006 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013581946222791292, + "loss": 0.8398, + "step": 2007 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013578745198463508, + "loss": 0.8204, + "step": 2008 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013575544174135724, + "loss": 0.8501, + "step": 2009 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001357234314980794, + "loss": 0.7279, + "step": 2010 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013569142125480153, + "loss": 0.6601, + "step": 2011 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001356594110115237, + "loss": 0.9364, + "step": 2012 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013562740076824585, + "loss": 0.7515, + "step": 2013 + }, + { + "epoch": 1.61, + "learning_rate": 0.000135595390524968, + "loss": 0.7803, + "step": 2014 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013556338028169015, + "loss": 0.8684, + "step": 2015 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001355313700384123, + "loss": 0.8614, + "step": 2016 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013549935979513444, + "loss": 0.7589, + "step": 2017 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001354673495518566, + "loss": 0.8162, + "step": 2018 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013543533930857876, + "loss": 0.7767, + "step": 2019 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013540332906530092, + "loss": 0.7627, + "step": 2020 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013537131882202305, + "loss": 0.9271, + "step": 2021 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013533930857874518, + "loss": 0.8937, + "step": 2022 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013530729833546737, + "loss": 0.7259, + "step": 2023 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001352752880921895, + "loss": 0.8774, + "step": 2024 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013524327784891166, + "loss": 0.9098, + "step": 2025 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001352112676056338, + "loss": 0.7975, + "step": 2026 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013517925736235598, + "loss": 0.8001, + "step": 2027 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013514724711907812, + "loss": 0.8083, + "step": 2028 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013511523687580025, + "loss": 0.8397, + "step": 2029 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001350832266325224, + "loss": 0.8172, + "step": 2030 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013505121638924457, + "loss": 0.9286, + "step": 2031 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013501920614596673, + "loss": 0.8163, + "step": 2032 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013498719590268886, + "loss": 0.8016, + "step": 2033 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013495518565941102, + "loss": 0.7472, + "step": 2034 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013492317541613318, + "loss": 0.8486, + "step": 2035 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001348911651728553, + "loss": 0.7617, + "step": 2036 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013485915492957747, + "loss": 0.7298, + "step": 2037 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013482714468629963, + "loss": 0.8364, + "step": 2038 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013479513444302176, + "loss": 0.8083, + "step": 2039 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013476312419974392, + "loss": 0.7807, + "step": 2040 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013473111395646606, + "loss": 0.8154, + "step": 2041 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013469910371318824, + "loss": 0.9473, + "step": 2042 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013466709346991038, + "loss": 0.8027, + "step": 2043 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001346350832266325, + "loss": 0.9493, + "step": 2044 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001346030729833547, + "loss": 0.9331, + "step": 2045 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013457106274007683, + "loss": 0.7347, + "step": 2046 + }, + { + "epoch": 1.64, + "learning_rate": 0.000134539052496799, + "loss": 0.8928, + "step": 2047 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013450704225352112, + "loss": 0.9055, + "step": 2048 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013447503201024328, + "loss": 0.7776, + "step": 2049 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013444302176696544, + "loss": 0.9204, + "step": 2050 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013441101152368757, + "loss": 0.7964, + "step": 2051 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013437900128040973, + "loss": 0.7855, + "step": 2052 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001343469910371319, + "loss": 0.8286, + "step": 2053 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013431498079385405, + "loss": 0.8406, + "step": 2054 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013428297055057619, + "loss": 0.7564, + "step": 2055 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013425096030729835, + "loss": 0.8321, + "step": 2056 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001342189500640205, + "loss": 0.7934, + "step": 2057 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013418693982074264, + "loss": 0.7673, + "step": 2058 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001341549295774648, + "loss": 0.6509, + "step": 2059 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013412291933418696, + "loss": 0.7446, + "step": 2060 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001340909090909091, + "loss": 0.7802, + "step": 2061 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013405889884763125, + "loss": 0.673, + "step": 2062 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013402688860435338, + "loss": 0.7185, + "step": 2063 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013399487836107557, + "loss": 0.7423, + "step": 2064 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001339628681177977, + "loss": 0.7384, + "step": 2065 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013393085787451983, + "loss": 0.7325, + "step": 2066 + }, + { + "epoch": 1.65, + "learning_rate": 0.000133898847631242, + "loss": 0.7808, + "step": 2067 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013386683738796415, + "loss": 0.8697, + "step": 2068 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013383482714468631, + "loss": 0.7318, + "step": 2069 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013380281690140845, + "loss": 0.8843, + "step": 2070 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001337708066581306, + "loss": 0.8745, + "step": 2071 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013373879641485277, + "loss": 0.8322, + "step": 2072 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001337067861715749, + "loss": 0.8294, + "step": 2073 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013367477592829706, + "loss": 0.8241, + "step": 2074 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013364276568501922, + "loss": 0.7071, + "step": 2075 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013361075544174135, + "loss": 0.7174, + "step": 2076 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001335787451984635, + "loss": 0.8647, + "step": 2077 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013354673495518567, + "loss": 0.7391, + "step": 2078 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013351472471190783, + "loss": 0.8611, + "step": 2079 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013348271446862996, + "loss": 0.7397, + "step": 2080 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013345070422535212, + "loss": 0.9228, + "step": 2081 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013341869398207428, + "loss": 0.7624, + "step": 2082 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013338668373879642, + "loss": 0.8612, + "step": 2083 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013335467349551858, + "loss": 0.7967, + "step": 2084 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001333226632522407, + "loss": 0.6998, + "step": 2085 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001332906530089629, + "loss": 0.7196, + "step": 2086 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013325864276568503, + "loss": 0.8389, + "step": 2087 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013322663252240716, + "loss": 0.8659, + "step": 2088 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013319462227912932, + "loss": 0.7824, + "step": 2089 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013316261203585148, + "loss": 0.8423, + "step": 2090 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013313060179257364, + "loss": 0.8072, + "step": 2091 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013309859154929577, + "loss": 0.688, + "step": 2092 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013306658130601793, + "loss": 0.816, + "step": 2093 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001330345710627401, + "loss": 0.8681, + "step": 2094 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013300256081946222, + "loss": 0.7747, + "step": 2095 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013297055057618438, + "loss": 0.7212, + "step": 2096 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013293854033290654, + "loss": 0.7292, + "step": 2097 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013290653008962868, + "loss": 0.7687, + "step": 2098 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013287451984635084, + "loss": 0.8876, + "step": 2099 + }, + { + "epoch": 1.68, + "learning_rate": 0.000132842509603073, + "loss": 0.7323, + "step": 2100 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013281049935979516, + "loss": 0.7519, + "step": 2101 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001327784891165173, + "loss": 0.8192, + "step": 2102 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013274647887323942, + "loss": 0.7152, + "step": 2103 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001327144686299616, + "loss": 0.8495, + "step": 2104 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013268245838668374, + "loss": 0.7164, + "step": 2105 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001326504481434059, + "loss": 0.7314, + "step": 2106 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013261843790012803, + "loss": 1.0317, + "step": 2107 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013258642765685022, + "loss": 0.7477, + "step": 2108 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013255441741357235, + "loss": 0.6991, + "step": 2109 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013252240717029449, + "loss": 0.7741, + "step": 2110 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013249039692701665, + "loss": 0.7659, + "step": 2111 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001324583866837388, + "loss": 0.8777, + "step": 2112 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013242637644046097, + "loss": 0.7962, + "step": 2113 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001323943661971831, + "loss": 0.6705, + "step": 2114 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013236235595390526, + "loss": 0.8803, + "step": 2115 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013233034571062742, + "loss": 0.7764, + "step": 2116 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013229833546734955, + "loss": 0.7597, + "step": 2117 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001322663252240717, + "loss": 0.8411, + "step": 2118 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013223431498079387, + "loss": 0.7529, + "step": 2119 + }, + { + "epoch": 1.7, + "learning_rate": 0.000132202304737516, + "loss": 0.7424, + "step": 2120 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013217029449423816, + "loss": 0.8081, + "step": 2121 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013213828425096032, + "loss": 0.8407, + "step": 2122 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013210627400768248, + "loss": 0.8361, + "step": 2123 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013207426376440461, + "loss": 0.7547, + "step": 2124 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013204225352112675, + "loss": 0.7599, + "step": 2125 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013201024327784893, + "loss": 0.8283, + "step": 2126 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013197823303457107, + "loss": 0.8298, + "step": 2127 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013194622279129323, + "loss": 0.8707, + "step": 2128 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013191421254801536, + "loss": 0.7072, + "step": 2129 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013188220230473752, + "loss": 0.8107, + "step": 2130 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013185019206145968, + "loss": 0.7704, + "step": 2131 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001318181818181818, + "loss": 0.8061, + "step": 2132 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013178617157490397, + "loss": 0.7352, + "step": 2133 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013175416133162613, + "loss": 0.8896, + "step": 2134 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013172215108834826, + "loss": 0.7622, + "step": 2135 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013169014084507042, + "loss": 0.8314, + "step": 2136 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013165813060179258, + "loss": 0.8009, + "step": 2137 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013162612035851474, + "loss": 0.7935, + "step": 2138 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013159411011523688, + "loss": 0.8486, + "step": 2139 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013156209987195904, + "loss": 0.7717, + "step": 2140 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001315300896286812, + "loss": 0.727, + "step": 2141 + }, + { + "epoch": 1.71, + "learning_rate": 0.00013149807938540333, + "loss": 0.774, + "step": 2142 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001314660691421255, + "loss": 0.7797, + "step": 2143 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013143405889884762, + "loss": 0.905, + "step": 2144 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001314020486555698, + "loss": 0.8966, + "step": 2145 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013137003841229194, + "loss": 0.7789, + "step": 2146 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013133802816901407, + "loss": 0.7626, + "step": 2147 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013130601792573626, + "loss": 0.7022, + "step": 2148 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001312740076824584, + "loss": 0.8213, + "step": 2149 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013124199743918055, + "loss": 0.6726, + "step": 2150 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013120998719590268, + "loss": 0.8322, + "step": 2151 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013117797695262484, + "loss": 0.8236, + "step": 2152 + }, + { + "epoch": 1.72, + "learning_rate": 0.000131145966709347, + "loss": 0.9294, + "step": 2153 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013111395646606914, + "loss": 0.7224, + "step": 2154 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001310819462227913, + "loss": 0.7275, + "step": 2155 + }, + { + "epoch": 1.72, + "learning_rate": 0.00013104993597951346, + "loss": 0.7875, + "step": 2156 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001310179257362356, + "loss": 0.7809, + "step": 2157 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013098591549295775, + "loss": 0.7978, + "step": 2158 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001309539052496799, + "loss": 0.8105, + "step": 2159 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013092189500640207, + "loss": 0.8092, + "step": 2160 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001308898847631242, + "loss": 0.7096, + "step": 2161 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013085787451984633, + "loss": 0.7163, + "step": 2162 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013082586427656852, + "loss": 0.7948, + "step": 2163 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013079385403329065, + "loss": 0.8346, + "step": 2164 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001307618437900128, + "loss": 0.8237, + "step": 2165 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013072983354673495, + "loss": 0.8551, + "step": 2166 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013069782330345713, + "loss": 0.8693, + "step": 2167 + }, + { + "epoch": 1.73, + "learning_rate": 0.00013066581306017927, + "loss": 0.811, + "step": 2168 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001306338028169014, + "loss": 0.6716, + "step": 2169 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013060179257362356, + "loss": 0.7152, + "step": 2170 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013056978233034572, + "loss": 0.7828, + "step": 2171 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013053777208706788, + "loss": 0.832, + "step": 2172 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013050576184379, + "loss": 0.8243, + "step": 2173 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013047375160051217, + "loss": 0.7984, + "step": 2174 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013044174135723433, + "loss": 0.9256, + "step": 2175 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013040973111395646, + "loss": 0.7593, + "step": 2176 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013037772087067862, + "loss": 0.6879, + "step": 2177 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013034571062740078, + "loss": 0.68, + "step": 2178 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013031370038412291, + "loss": 0.9258, + "step": 2179 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013028169014084507, + "loss": 0.8539, + "step": 2180 + }, + { + "epoch": 1.74, + "learning_rate": 0.00013024967989756723, + "loss": 0.8874, + "step": 2181 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001302176696542894, + "loss": 0.865, + "step": 2182 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013018565941101153, + "loss": 0.8276, + "step": 2183 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013015364916773366, + "loss": 0.79, + "step": 2184 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013012163892445585, + "loss": 0.8691, + "step": 2185 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013008962868117798, + "loss": 0.8191, + "step": 2186 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013005761843790014, + "loss": 0.8892, + "step": 2187 + }, + { + "epoch": 1.75, + "learning_rate": 0.00013002560819462227, + "loss": 0.9245, + "step": 2188 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012999359795134443, + "loss": 0.7298, + "step": 2189 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001299615877080666, + "loss": 0.7226, + "step": 2190 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012992957746478872, + "loss": 0.8049, + "step": 2191 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012989756722151088, + "loss": 0.7365, + "step": 2192 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012986555697823304, + "loss": 0.8101, + "step": 2193 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001298335467349552, + "loss": 0.9158, + "step": 2194 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012980153649167734, + "loss": 0.7195, + "step": 2195 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001297695262483995, + "loss": 0.7619, + "step": 2196 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012973751600512166, + "loss": 0.7061, + "step": 2197 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001297055057618438, + "loss": 0.7237, + "step": 2198 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012967349551856595, + "loss": 0.8493, + "step": 2199 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001296414852752881, + "loss": 0.7741, + "step": 2200 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012960947503201024, + "loss": 0.7358, + "step": 2201 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001295774647887324, + "loss": 0.8846, + "step": 2202 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012954545454545456, + "loss": 0.664, + "step": 2203 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012951344430217672, + "loss": 0.8512, + "step": 2204 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012948143405889885, + "loss": 0.7398, + "step": 2205 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012944942381562099, + "loss": 0.888, + "step": 2206 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012941741357234317, + "loss": 0.7014, + "step": 2207 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001293854033290653, + "loss": 0.7836, + "step": 2208 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012935339308578746, + "loss": 0.7568, + "step": 2209 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001293213828425096, + "loss": 0.776, + "step": 2210 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012928937259923176, + "loss": 0.761, + "step": 2211 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012925736235595392, + "loss": 0.8871, + "step": 2212 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012922535211267605, + "loss": 0.7431, + "step": 2213 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001291933418693982, + "loss": 0.8047, + "step": 2214 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012916133162612037, + "loss": 0.7397, + "step": 2215 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001291293213828425, + "loss": 0.714, + "step": 2216 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012909731113956466, + "loss": 0.7932, + "step": 2217 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012906530089628682, + "loss": 0.7079, + "step": 2218 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012903329065300898, + "loss": 0.7138, + "step": 2219 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012900128040973111, + "loss": 0.8064, + "step": 2220 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012896927016645327, + "loss": 0.9156, + "step": 2221 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012893725992317543, + "loss": 0.8304, + "step": 2222 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012890524967989757, + "loss": 0.6974, + "step": 2223 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012887323943661973, + "loss": 0.7663, + "step": 2224 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012884122919334189, + "loss": 0.8244, + "step": 2225 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012880921895006405, + "loss": 0.8414, + "step": 2226 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012877720870678618, + "loss": 0.7259, + "step": 2227 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001287451984635083, + "loss": 0.7257, + "step": 2228 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001287131882202305, + "loss": 0.7268, + "step": 2229 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012868117797695263, + "loss": 0.76, + "step": 2230 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001286491677336748, + "loss": 0.8708, + "step": 2231 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012861715749039692, + "loss": 0.6455, + "step": 2232 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012858514724711908, + "loss": 0.7873, + "step": 2233 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012855313700384124, + "loss": 0.7654, + "step": 2234 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012852112676056337, + "loss": 0.8234, + "step": 2235 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012848911651728553, + "loss": 0.8136, + "step": 2236 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001284571062740077, + "loss": 0.7151, + "step": 2237 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012842509603072983, + "loss": 0.8137, + "step": 2238 + }, + { + "epoch": 1.79, + "learning_rate": 0.000128393085787452, + "loss": 0.82, + "step": 2239 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012836107554417415, + "loss": 0.7529, + "step": 2240 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001283290653008963, + "loss": 0.8577, + "step": 2241 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012829705505761844, + "loss": 0.8473, + "step": 2242 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012826504481434057, + "loss": 0.7634, + "step": 2243 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012823303457106276, + "loss": 0.6804, + "step": 2244 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001282010243277849, + "loss": 0.7933, + "step": 2245 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012816901408450705, + "loss": 0.7828, + "step": 2246 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012813700384122918, + "loss": 0.6968, + "step": 2247 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012810499359795137, + "loss": 0.9029, + "step": 2248 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001280729833546735, + "loss": 0.8473, + "step": 2249 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012804097311139564, + "loss": 0.8141, + "step": 2250 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012800896286811782, + "loss": 0.8096, + "step": 2251 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012797695262483996, + "loss": 0.8011, + "step": 2252 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012794494238156212, + "loss": 0.7916, + "step": 2253 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012791293213828425, + "loss": 0.8641, + "step": 2254 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001278809218950064, + "loss": 0.8875, + "step": 2255 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012784891165172857, + "loss": 0.8044, + "step": 2256 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001278169014084507, + "loss": 0.745, + "step": 2257 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012778489116517286, + "loss": 0.7276, + "step": 2258 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012775288092189502, + "loss": 0.7136, + "step": 2259 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012772087067861715, + "loss": 0.7546, + "step": 2260 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001276888604353393, + "loss": 0.7768, + "step": 2261 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012765685019206147, + "loss": 0.8184, + "step": 2262 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012762483994878363, + "loss": 0.8803, + "step": 2263 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012759282970550576, + "loss": 0.6996, + "step": 2264 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001275608194622279, + "loss": 0.8094, + "step": 2265 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012752880921895008, + "loss": 0.8643, + "step": 2266 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012749679897567222, + "loss": 0.8093, + "step": 2267 + }, + { + "epoch": 1.81, + "learning_rate": 0.00012746478873239438, + "loss": 0.7798, + "step": 2268 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001274327784891165, + "loss": 0.805, + "step": 2269 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012740076824583867, + "loss": 0.8661, + "step": 2270 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012736875800256083, + "loss": 0.7384, + "step": 2271 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012733674775928296, + "loss": 0.7012, + "step": 2272 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012730473751600512, + "loss": 0.821, + "step": 2273 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012727272727272728, + "loss": 0.7364, + "step": 2274 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012724071702944941, + "loss": 0.7597, + "step": 2275 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012720870678617157, + "loss": 0.8765, + "step": 2276 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012717669654289373, + "loss": 0.8899, + "step": 2277 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001271446862996159, + "loss": 0.9863, + "step": 2278 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012711267605633803, + "loss": 0.7546, + "step": 2279 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012708066581306019, + "loss": 0.8696, + "step": 2280 + }, + { + "epoch": 1.82, + "learning_rate": 0.00012704865556978235, + "loss": 0.8557, + "step": 2281 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012701664532650448, + "loss": 0.6489, + "step": 2282 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012698463508322664, + "loss": 0.7624, + "step": 2283 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001269526248399488, + "loss": 0.7215, + "step": 2284 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012692061459667096, + "loss": 0.7462, + "step": 2285 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001268886043533931, + "loss": 0.741, + "step": 2286 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012685659411011522, + "loss": 0.8578, + "step": 2287 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001268245838668374, + "loss": 0.7616, + "step": 2288 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012679257362355954, + "loss": 0.7315, + "step": 2289 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001267605633802817, + "loss": 0.7028, + "step": 2290 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012672855313700384, + "loss": 0.867, + "step": 2291 + }, + { + "epoch": 1.83, + "learning_rate": 0.000126696542893726, + "loss": 0.7759, + "step": 2292 + }, + { + "epoch": 1.83, + "learning_rate": 0.00012666453265044815, + "loss": 0.7721, + "step": 2293 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001266325224071703, + "loss": 0.851, + "step": 2294 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012660051216389245, + "loss": 0.8215, + "step": 2295 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001265685019206146, + "loss": 0.8142, + "step": 2296 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012653649167733674, + "loss": 0.8249, + "step": 2297 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001265044814340589, + "loss": 0.8673, + "step": 2298 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012647247119078106, + "loss": 0.7959, + "step": 2299 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012644046094750322, + "loss": 0.7484, + "step": 2300 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012640845070422535, + "loss": 0.7854, + "step": 2301 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001263764404609475, + "loss": 0.8113, + "step": 2302 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012634443021766967, + "loss": 0.858, + "step": 2303 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001263124199743918, + "loss": 0.7071, + "step": 2304 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012628040973111396, + "loss": 0.7705, + "step": 2305 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012624839948783612, + "loss": 0.7816, + "step": 2306 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012621638924455828, + "loss": 0.8441, + "step": 2307 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012618437900128042, + "loss": 0.7452, + "step": 2308 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012615236875800255, + "loss": 0.6828, + "step": 2309 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012612035851472474, + "loss": 0.7328, + "step": 2310 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012608834827144687, + "loss": 0.8251, + "step": 2311 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012605633802816903, + "loss": 0.8271, + "step": 2312 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012602432778489116, + "loss": 0.9255, + "step": 2313 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012599231754161332, + "loss": 0.8365, + "step": 2314 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012596030729833548, + "loss": 0.8354, + "step": 2315 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001259282970550576, + "loss": 0.8308, + "step": 2316 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012589628681177977, + "loss": 0.7735, + "step": 2317 + }, + { + "epoch": 1.85, + "learning_rate": 0.00012586427656850193, + "loss": 0.8198, + "step": 2318 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012583226632522407, + "loss": 0.8061, + "step": 2319 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012580025608194622, + "loss": 0.7423, + "step": 2320 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012576824583866838, + "loss": 0.8387, + "step": 2321 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012573623559539054, + "loss": 0.8104, + "step": 2322 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012570422535211268, + "loss": 0.6895, + "step": 2323 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001256722151088348, + "loss": 0.7788, + "step": 2324 + }, + { + "epoch": 1.86, + "learning_rate": 0.000125640204865557, + "loss": 0.6953, + "step": 2325 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012560819462227913, + "loss": 0.8365, + "step": 2326 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001255761843790013, + "loss": 0.6794, + "step": 2327 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012554417413572345, + "loss": 0.7183, + "step": 2328 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012551216389244558, + "loss": 0.7555, + "step": 2329 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012548015364916774, + "loss": 0.6211, + "step": 2330 + }, + { + "epoch": 1.86, + "learning_rate": 0.00012544814340588987, + "loss": 0.7012, + "step": 2331 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012541613316261206, + "loss": 0.8457, + "step": 2332 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001253841229193342, + "loss": 0.7994, + "step": 2333 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012535211267605635, + "loss": 0.8869, + "step": 2334 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012532010243277849, + "loss": 0.7663, + "step": 2335 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012528809218950065, + "loss": 0.7474, + "step": 2336 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001252560819462228, + "loss": 0.7913, + "step": 2337 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012522407170294494, + "loss": 0.716, + "step": 2338 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001251920614596671, + "loss": 0.7733, + "step": 2339 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012516005121638926, + "loss": 0.8193, + "step": 2340 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001251280409731114, + "loss": 0.7387, + "step": 2341 + }, + { + "epoch": 1.87, + "learning_rate": 0.00012509603072983355, + "loss": 0.7716, + "step": 2342 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001250640204865557, + "loss": 0.8527, + "step": 2343 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012503201024327787, + "loss": 0.8887, + "step": 2344 + }, + { + "epoch": 1.88, + "learning_rate": 0.000125, + "loss": 0.8372, + "step": 2345 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012496798975672214, + "loss": 0.7878, + "step": 2346 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012493597951344432, + "loss": 0.8135, + "step": 2347 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012490396927016645, + "loss": 0.7553, + "step": 2348 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012487195902688861, + "loss": 0.7788, + "step": 2349 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012483994878361075, + "loss": 0.702, + "step": 2350 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001248079385403329, + "loss": 0.7777, + "step": 2351 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012477592829705507, + "loss": 0.7782, + "step": 2352 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001247439180537772, + "loss": 0.7064, + "step": 2353 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001247119078104994, + "loss": 0.87, + "step": 2354 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012467989756722152, + "loss": 0.7461, + "step": 2355 + }, + { + "epoch": 1.88, + "learning_rate": 0.00012464788732394365, + "loss": 0.7626, + "step": 2356 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001246158770806658, + "loss": 0.8239, + "step": 2357 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012458386683738797, + "loss": 0.8218, + "step": 2358 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012455185659411013, + "loss": 0.8193, + "step": 2359 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012451984635083226, + "loss": 0.7553, + "step": 2360 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012448783610755442, + "loss": 0.8027, + "step": 2361 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012445582586427658, + "loss": 0.742, + "step": 2362 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012442381562099872, + "loss": 0.7981, + "step": 2363 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012439180537772088, + "loss": 0.7316, + "step": 2364 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012435979513444304, + "loss": 0.855, + "step": 2365 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001243277848911652, + "loss": 0.8466, + "step": 2366 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012429577464788733, + "loss": 0.8062, + "step": 2367 + }, + { + "epoch": 1.89, + "learning_rate": 0.00012426376440460946, + "loss": 0.828, + "step": 2368 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012423175416133165, + "loss": 0.8328, + "step": 2369 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012419974391805378, + "loss": 0.8228, + "step": 2370 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012416773367477594, + "loss": 0.7368, + "step": 2371 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012413572343149807, + "loss": 0.6311, + "step": 2372 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012410371318822023, + "loss": 0.8687, + "step": 2373 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001240717029449424, + "loss": 0.7835, + "step": 2374 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012403969270166453, + "loss": 0.7978, + "step": 2375 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012400768245838668, + "loss": 0.7309, + "step": 2376 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012397567221510884, + "loss": 0.912, + "step": 2377 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012394366197183098, + "loss": 0.8043, + "step": 2378 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012391165172855314, + "loss": 0.7248, + "step": 2379 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001238796414852753, + "loss": 0.7984, + "step": 2380 + }, + { + "epoch": 1.9, + "learning_rate": 0.00012384763124199746, + "loss": 0.6796, + "step": 2381 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001238156209987196, + "loss": 0.7677, + "step": 2382 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012378361075544175, + "loss": 0.6908, + "step": 2383 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001237516005121639, + "loss": 0.7271, + "step": 2384 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012371959026888604, + "loss": 0.8619, + "step": 2385 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001236875800256082, + "loss": 0.8978, + "step": 2386 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012365556978233036, + "loss": 0.8246, + "step": 2387 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012362355953905252, + "loss": 0.77, + "step": 2388 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012359154929577465, + "loss": 0.8321, + "step": 2389 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012355953905249679, + "loss": 0.8505, + "step": 2390 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012352752880921897, + "loss": 0.8258, + "step": 2391 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001234955185659411, + "loss": 0.7932, + "step": 2392 + }, + { + "epoch": 1.91, + "learning_rate": 0.00012346350832266327, + "loss": 0.8022, + "step": 2393 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001234314980793854, + "loss": 0.8167, + "step": 2394 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012339948783610756, + "loss": 0.9885, + "step": 2395 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012336747759282972, + "loss": 0.8107, + "step": 2396 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012333546734955185, + "loss": 0.834, + "step": 2397 + }, + { + "epoch": 1.92, + "learning_rate": 0.000123303457106274, + "loss": 0.8665, + "step": 2398 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012327144686299617, + "loss": 0.8698, + "step": 2399 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001232394366197183, + "loss": 0.6976, + "step": 2400 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012320742637644046, + "loss": 0.6956, + "step": 2401 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012317541613316262, + "loss": 0.7654, + "step": 2402 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012314340588988478, + "loss": 0.79, + "step": 2403 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012311139564660691, + "loss": 0.7098, + "step": 2404 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012307938540332907, + "loss": 0.6704, + "step": 2405 + }, + { + "epoch": 1.92, + "learning_rate": 0.00012304737516005123, + "loss": 0.7923, + "step": 2406 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012301536491677337, + "loss": 0.7044, + "step": 2407 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012298335467349553, + "loss": 0.8322, + "step": 2408 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001229513444302177, + "loss": 0.7624, + "step": 2409 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012291933418693982, + "loss": 0.8066, + "step": 2410 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012288732394366198, + "loss": 0.7718, + "step": 2411 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001228553137003841, + "loss": 0.7995, + "step": 2412 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001228233034571063, + "loss": 0.8573, + "step": 2413 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012279129321382843, + "loss": 0.8743, + "step": 2414 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001227592829705506, + "loss": 0.8752, + "step": 2415 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012272727272727272, + "loss": 0.7974, + "step": 2416 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012269526248399488, + "loss": 0.7705, + "step": 2417 + }, + { + "epoch": 1.93, + "learning_rate": 0.00012266325224071704, + "loss": 0.9263, + "step": 2418 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012263124199743918, + "loss": 0.8065, + "step": 2419 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012259923175416134, + "loss": 0.7873, + "step": 2420 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001225672215108835, + "loss": 0.8183, + "step": 2421 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012253521126760563, + "loss": 0.7773, + "step": 2422 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001225032010243278, + "loss": 0.8035, + "step": 2423 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012247119078104995, + "loss": 0.8513, + "step": 2424 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001224391805377721, + "loss": 0.8112, + "step": 2425 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012240717029449424, + "loss": 0.6508, + "step": 2426 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012237516005121637, + "loss": 0.7103, + "step": 2427 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012234314980793856, + "loss": 0.7747, + "step": 2428 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001223111395646607, + "loss": 0.8929, + "step": 2429 + }, + { + "epoch": 1.94, + "learning_rate": 0.00012227912932138285, + "loss": 0.7981, + "step": 2430 + }, + { + "epoch": 1.94, + "learning_rate": 0.000122247119078105, + "loss": 0.8632, + "step": 2431 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012221510883482714, + "loss": 0.9111, + "step": 2432 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001221830985915493, + "loss": 0.7826, + "step": 2433 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012215108834827144, + "loss": 0.8707, + "step": 2434 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012211907810499362, + "loss": 0.7968, + "step": 2435 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012208706786171576, + "loss": 0.7949, + "step": 2436 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012205505761843789, + "loss": 0.8064, + "step": 2437 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012202304737516006, + "loss": 0.7231, + "step": 2438 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001219910371318822, + "loss": 0.7804, + "step": 2439 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012195902688860437, + "loss": 0.7764, + "step": 2440 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001219270166453265, + "loss": 0.6856, + "step": 2441 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012189500640204865, + "loss": 0.8854, + "step": 2442 + }, + { + "epoch": 1.95, + "learning_rate": 0.00012186299615877082, + "loss": 0.8723, + "step": 2443 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012183098591549295, + "loss": 0.7711, + "step": 2444 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012179897567221513, + "loss": 0.7159, + "step": 2445 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012176696542893726, + "loss": 0.7352, + "step": 2446 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012173495518565943, + "loss": 0.672, + "step": 2447 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012170294494238157, + "loss": 0.7927, + "step": 2448 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012167093469910371, + "loss": 0.763, + "step": 2449 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012163892445582587, + "loss": 0.7552, + "step": 2450 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012160691421254802, + "loss": 0.7458, + "step": 2451 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012157490396927018, + "loss": 0.8155, + "step": 2452 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012154289372599232, + "loss": 0.8065, + "step": 2453 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012151088348271447, + "loss": 0.8267, + "step": 2454 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012147887323943663, + "loss": 0.6709, + "step": 2455 + }, + { + "epoch": 1.96, + "learning_rate": 0.00012144686299615878, + "loss": 0.8355, + "step": 2456 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012141485275288094, + "loss": 0.7979, + "step": 2457 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012138284250960308, + "loss": 0.809, + "step": 2458 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012135083226632522, + "loss": 0.6368, + "step": 2459 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012131882202304739, + "loss": 0.9048, + "step": 2460 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012128681177976952, + "loss": 0.7964, + "step": 2461 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001212548015364917, + "loss": 0.7142, + "step": 2462 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012122279129321383, + "loss": 0.8145, + "step": 2463 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012119078104993597, + "loss": 0.8478, + "step": 2464 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012115877080665813, + "loss": 0.7651, + "step": 2465 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012112676056338028, + "loss": 0.7467, + "step": 2466 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012109475032010244, + "loss": 0.6697, + "step": 2467 + }, + { + "epoch": 1.97, + "learning_rate": 0.00012106274007682459, + "loss": 0.7564, + "step": 2468 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012103072983354673, + "loss": 0.8616, + "step": 2469 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012099871959026889, + "loss": 0.7635, + "step": 2470 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012096670934699104, + "loss": 0.8381, + "step": 2471 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001209346991037132, + "loss": 0.9119, + "step": 2472 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012090268886043534, + "loss": 0.8192, + "step": 2473 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001208706786171575, + "loss": 0.6943, + "step": 2474 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012083866837387965, + "loss": 0.6984, + "step": 2475 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001208066581306018, + "loss": 0.7814, + "step": 2476 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012077464788732396, + "loss": 0.7123, + "step": 2477 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001207426376440461, + "loss": 0.9142, + "step": 2478 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012071062740076826, + "loss": 0.7761, + "step": 2479 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012067861715749041, + "loss": 0.9016, + "step": 2480 + }, + { + "epoch": 1.98, + "learning_rate": 0.00012064660691421254, + "loss": 0.744, + "step": 2481 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012061459667093471, + "loss": 0.8666, + "step": 2482 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012058258642765685, + "loss": 0.7122, + "step": 2483 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012055057618437902, + "loss": 0.7377, + "step": 2484 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012051856594110115, + "loss": 0.8891, + "step": 2485 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001204865556978233, + "loss": 0.7556, + "step": 2486 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012045454545454546, + "loss": 0.885, + "step": 2487 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001204225352112676, + "loss": 0.6879, + "step": 2488 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012039052496798976, + "loss": 0.8877, + "step": 2489 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012035851472471191, + "loss": 0.7082, + "step": 2490 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012032650448143406, + "loss": 0.7311, + "step": 2491 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012029449423815622, + "loss": 0.7437, + "step": 2492 + }, + { + "epoch": 1.99, + "learning_rate": 0.00012026248399487836, + "loss": 0.8843, + "step": 2493 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012023047375160052, + "loss": 0.8721, + "step": 2494 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012019846350832267, + "loss": 0.8123, + "step": 2495 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012016645326504482, + "loss": 0.7354, + "step": 2496 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012013444302176698, + "loss": 0.8254, + "step": 2497 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012010243277848912, + "loss": 0.7907, + "step": 2498 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012007042253521128, + "loss": 0.8544, + "step": 2499 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012003841229193343, + "loss": 0.805, + "step": 2500 + }, + { + "epoch": 2.0, + "learning_rate": 0.00012000640204865559, + "loss": 0.7642, + "step": 2501 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011997439180537773, + "loss": 0.7418, + "step": 2502 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011994238156209987, + "loss": 0.8139, + "step": 2503 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011991037131882204, + "loss": 0.7301, + "step": 2504 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011987836107554417, + "loss": 0.8319, + "step": 2505 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011984635083226635, + "loss": 0.8436, + "step": 2506 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011981434058898848, + "loss": 0.7254, + "step": 2507 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011978233034571062, + "loss": 0.7714, + "step": 2508 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011975032010243278, + "loss": 0.7144, + "step": 2509 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011971830985915493, + "loss": 0.7382, + "step": 2510 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011968629961587709, + "loss": 0.7934, + "step": 2511 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011965428937259924, + "loss": 0.7392, + "step": 2512 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011962227912932138, + "loss": 0.6926, + "step": 2513 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011959026888604354, + "loss": 0.8156, + "step": 2514 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011955825864276569, + "loss": 0.8945, + "step": 2515 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011952624839948785, + "loss": 0.6613, + "step": 2516 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011949423815621, + "loss": 0.7856, + "step": 2517 + }, + { + "epoch": 2.01, + "learning_rate": 0.00011946222791293213, + "loss": 0.7174, + "step": 2518 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001194302176696543, + "loss": 0.7856, + "step": 2519 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011939820742637645, + "loss": 0.7686, + "step": 2520 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011936619718309861, + "loss": 0.7643, + "step": 2521 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011933418693982075, + "loss": 0.6553, + "step": 2522 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011930217669654289, + "loss": 0.6704, + "step": 2523 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011927016645326506, + "loss": 0.6359, + "step": 2524 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011923815620998719, + "loss": 0.67, + "step": 2525 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011920614596670937, + "loss": 0.8417, + "step": 2526 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001191741357234315, + "loss": 0.7306, + "step": 2527 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011914212548015367, + "loss": 0.7274, + "step": 2528 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001191101152368758, + "loss": 0.6903, + "step": 2529 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011907810499359795, + "loss": 0.7844, + "step": 2530 + }, + { + "epoch": 2.02, + "learning_rate": 0.00011904609475032011, + "loss": 0.7448, + "step": 2531 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011901408450704226, + "loss": 0.7006, + "step": 2532 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011898207426376442, + "loss": 0.6626, + "step": 2533 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011895006402048656, + "loss": 0.6921, + "step": 2534 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011891805377720871, + "loss": 0.7757, + "step": 2535 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011888604353393087, + "loss": 0.7851, + "step": 2536 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011885403329065301, + "loss": 0.6929, + "step": 2537 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011882202304737517, + "loss": 0.7555, + "step": 2538 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011879001280409732, + "loss": 0.6841, + "step": 2539 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011875800256081945, + "loss": 0.7737, + "step": 2540 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011872599231754163, + "loss": 0.7018, + "step": 2541 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011869398207426376, + "loss": 0.682, + "step": 2542 + }, + { + "epoch": 2.03, + "learning_rate": 0.00011866197183098593, + "loss": 0.725, + "step": 2543 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011862996158770807, + "loss": 0.6858, + "step": 2544 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011859795134443021, + "loss": 0.755, + "step": 2545 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011856594110115238, + "loss": 0.7388, + "step": 2546 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011853393085787452, + "loss": 0.8237, + "step": 2547 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011850192061459669, + "loss": 0.7838, + "step": 2548 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011846991037131882, + "loss": 0.6704, + "step": 2549 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011843790012804097, + "loss": 0.6804, + "step": 2550 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011840588988476313, + "loss": 0.7985, + "step": 2551 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011837387964148528, + "loss": 0.6407, + "step": 2552 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011834186939820744, + "loss": 0.7917, + "step": 2553 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011830985915492958, + "loss": 0.6359, + "step": 2554 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011827784891165174, + "loss": 0.6689, + "step": 2555 + }, + { + "epoch": 2.04, + "learning_rate": 0.00011824583866837389, + "loss": 0.6379, + "step": 2556 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011821382842509603, + "loss": 0.6704, + "step": 2557 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001181818181818182, + "loss": 0.776, + "step": 2558 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011814980793854034, + "loss": 0.7535, + "step": 2559 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001181177976952625, + "loss": 0.7159, + "step": 2560 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011808578745198465, + "loss": 0.7401, + "step": 2561 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011805377720870678, + "loss": 0.8352, + "step": 2562 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011802176696542895, + "loss": 0.8276, + "step": 2563 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011798975672215108, + "loss": 0.872, + "step": 2564 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011795774647887326, + "loss": 0.6557, + "step": 2565 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011792573623559539, + "loss": 0.698, + "step": 2566 + }, + { + "epoch": 2.05, + "learning_rate": 0.00011789372599231754, + "loss": 0.7534, + "step": 2567 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001178617157490397, + "loss": 0.8154, + "step": 2568 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011782970550576184, + "loss": 0.7103, + "step": 2569 + }, + { + "epoch": 2.06, + "learning_rate": 0.000117797695262484, + "loss": 0.7232, + "step": 2570 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011776568501920615, + "loss": 0.7581, + "step": 2571 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001177336747759283, + "loss": 0.8078, + "step": 2572 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011770166453265045, + "loss": 0.7282, + "step": 2573 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001176696542893726, + "loss": 0.7558, + "step": 2574 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011763764404609476, + "loss": 0.7168, + "step": 2575 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011760563380281691, + "loss": 0.6692, + "step": 2576 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011757362355953905, + "loss": 0.8424, + "step": 2577 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011754161331626121, + "loss": 0.8837, + "step": 2578 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011750960307298336, + "loss": 0.9305, + "step": 2579 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011747759282970552, + "loss": 0.7255, + "step": 2580 + }, + { + "epoch": 2.06, + "learning_rate": 0.00011744558258642767, + "loss": 0.7444, + "step": 2581 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001174135723431498, + "loss": 0.7215, + "step": 2582 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011738156209987197, + "loss": 0.6114, + "step": 2583 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001173495518565941, + "loss": 0.8114, + "step": 2584 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011731754161331628, + "loss": 0.7906, + "step": 2585 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011728553137003841, + "loss": 0.7395, + "step": 2586 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011725352112676058, + "loss": 0.912, + "step": 2587 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011722151088348272, + "loss": 0.6466, + "step": 2588 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011718950064020486, + "loss": 0.92, + "step": 2589 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011715749039692702, + "loss": 0.6754, + "step": 2590 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011712548015364917, + "loss": 0.7114, + "step": 2591 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011709346991037133, + "loss": 0.7, + "step": 2592 + }, + { + "epoch": 2.07, + "learning_rate": 0.00011706145966709347, + "loss": 0.7569, + "step": 2593 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011702944942381562, + "loss": 0.841, + "step": 2594 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011699743918053778, + "loss": 0.7138, + "step": 2595 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011696542893725993, + "loss": 0.828, + "step": 2596 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011693341869398209, + "loss": 0.6626, + "step": 2597 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011690140845070423, + "loss": 0.8019, + "step": 2598 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011686939820742638, + "loss": 0.6483, + "step": 2599 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011683738796414854, + "loss": 0.7735, + "step": 2600 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011680537772087068, + "loss": 0.6717, + "step": 2601 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011677336747759284, + "loss": 0.7058, + "step": 2602 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011674135723431499, + "loss": 0.7001, + "step": 2603 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011670934699103712, + "loss": 0.783, + "step": 2604 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001166773367477593, + "loss": 0.6655, + "step": 2605 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011664532650448143, + "loss": 0.8735, + "step": 2606 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001166133162612036, + "loss": 0.749, + "step": 2607 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011658130601792574, + "loss": 0.7387, + "step": 2608 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011654929577464788, + "loss": 0.6799, + "step": 2609 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011651728553137004, + "loss": 0.7341, + "step": 2610 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011648527528809219, + "loss": 0.7244, + "step": 2611 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011645326504481435, + "loss": 0.8251, + "step": 2612 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001164212548015365, + "loss": 0.7844, + "step": 2613 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011638924455825865, + "loss": 0.7674, + "step": 2614 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001163572343149808, + "loss": 0.7316, + "step": 2615 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011632522407170295, + "loss": 0.7028, + "step": 2616 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001162932138284251, + "loss": 0.758, + "step": 2617 + }, + { + "epoch": 2.09, + "learning_rate": 0.00011626120358514725, + "loss": 0.8201, + "step": 2618 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011622919334186941, + "loss": 0.8022, + "step": 2619 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011619718309859156, + "loss": 0.7581, + "step": 2620 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011616517285531369, + "loss": 0.648, + "step": 2621 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011613316261203586, + "loss": 0.5945, + "step": 2622 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011610115236875801, + "loss": 0.6554, + "step": 2623 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011606914212548017, + "loss": 0.7788, + "step": 2624 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011603713188220232, + "loss": 0.7396, + "step": 2625 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011600512163892445, + "loss": 0.7621, + "step": 2626 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011597311139564662, + "loss": 0.7778, + "step": 2627 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011594110115236876, + "loss": 0.7829, + "step": 2628 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011590909090909093, + "loss": 0.8175, + "step": 2629 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011587708066581306, + "loss": 0.7608, + "step": 2630 + }, + { + "epoch": 2.1, + "learning_rate": 0.00011584507042253521, + "loss": 0.7722, + "step": 2631 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011581306017925737, + "loss": 0.7389, + "step": 2632 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011578104993597951, + "loss": 0.6949, + "step": 2633 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011574903969270167, + "loss": 0.7604, + "step": 2634 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011571702944942382, + "loss": 0.7373, + "step": 2635 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011568501920614597, + "loss": 0.7898, + "step": 2636 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011565300896286813, + "loss": 0.7864, + "step": 2637 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011562099871959027, + "loss": 0.6213, + "step": 2638 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011558898847631243, + "loss": 0.7132, + "step": 2639 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011555697823303458, + "loss": 0.7971, + "step": 2640 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011552496798975674, + "loss": 0.694, + "step": 2641 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011549295774647888, + "loss": 0.7641, + "step": 2642 + }, + { + "epoch": 2.11, + "learning_rate": 0.00011546094750320102, + "loss": 0.8138, + "step": 2643 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011542893725992319, + "loss": 0.6394, + "step": 2644 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011539692701664532, + "loss": 0.7759, + "step": 2645 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001153649167733675, + "loss": 0.7264, + "step": 2646 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011533290653008963, + "loss": 0.7146, + "step": 2647 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011530089628681177, + "loss": 0.8451, + "step": 2648 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011526888604353395, + "loss": 0.8832, + "step": 2649 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011523687580025608, + "loss": 0.7071, + "step": 2650 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011520486555697825, + "loss": 0.8829, + "step": 2651 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011517285531370039, + "loss": 0.7301, + "step": 2652 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011514084507042253, + "loss": 0.7227, + "step": 2653 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011510883482714469, + "loss": 0.7201, + "step": 2654 + }, + { + "epoch": 2.12, + "learning_rate": 0.00011507682458386684, + "loss": 0.7937, + "step": 2655 + }, + { + "epoch": 2.12, + "learning_rate": 0.000115044814340589, + "loss": 0.8179, + "step": 2656 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011501280409731114, + "loss": 0.77, + "step": 2657 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011498079385403329, + "loss": 0.6928, + "step": 2658 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011494878361075545, + "loss": 0.6454, + "step": 2659 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001149167733674776, + "loss": 0.7725, + "step": 2660 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011488476312419976, + "loss": 0.9504, + "step": 2661 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001148527528809219, + "loss": 0.719, + "step": 2662 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011482074263764404, + "loss": 0.7262, + "step": 2663 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011478873239436621, + "loss": 0.7349, + "step": 2664 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011475672215108834, + "loss": 0.63, + "step": 2665 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011472471190781052, + "loss": 0.7517, + "step": 2666 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011469270166453265, + "loss": 0.6737, + "step": 2667 + }, + { + "epoch": 2.13, + "learning_rate": 0.00011466069142125482, + "loss": 0.806, + "step": 2668 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011462868117797695, + "loss": 0.7152, + "step": 2669 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001145966709346991, + "loss": 0.6558, + "step": 2670 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011456466069142126, + "loss": 0.8174, + "step": 2671 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001145326504481434, + "loss": 0.6523, + "step": 2672 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011450064020486557, + "loss": 0.8625, + "step": 2673 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011446862996158771, + "loss": 0.7599, + "step": 2674 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011443661971830986, + "loss": 0.8176, + "step": 2675 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011440460947503202, + "loss": 0.807, + "step": 2676 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011437259923175416, + "loss": 0.7015, + "step": 2677 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011434058898847632, + "loss": 0.7423, + "step": 2678 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011430857874519847, + "loss": 0.7265, + "step": 2679 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011427656850192062, + "loss": 0.7397, + "step": 2680 + }, + { + "epoch": 2.14, + "learning_rate": 0.00011424455825864278, + "loss": 0.6523, + "step": 2681 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011421254801536492, + "loss": 0.8245, + "step": 2682 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011418053777208708, + "loss": 0.8496, + "step": 2683 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011414852752880923, + "loss": 0.6092, + "step": 2684 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011411651728553136, + "loss": 0.7027, + "step": 2685 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011408450704225353, + "loss": 0.6607, + "step": 2686 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011405249679897567, + "loss": 0.7962, + "step": 2687 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011402048655569784, + "loss": 0.7765, + "step": 2688 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011398847631241997, + "loss": 0.7252, + "step": 2689 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011395646606914212, + "loss": 0.7404, + "step": 2690 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011392445582586428, + "loss": 0.7645, + "step": 2691 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011389244558258643, + "loss": 0.8634, + "step": 2692 + }, + { + "epoch": 2.15, + "learning_rate": 0.00011386043533930859, + "loss": 0.7868, + "step": 2693 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011382842509603073, + "loss": 0.8595, + "step": 2694 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011379641485275289, + "loss": 0.724, + "step": 2695 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011376440460947504, + "loss": 0.7439, + "step": 2696 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011373239436619718, + "loss": 0.766, + "step": 2697 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011370038412291934, + "loss": 0.7569, + "step": 2698 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011366837387964149, + "loss": 0.8244, + "step": 2699 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011363636363636365, + "loss": 0.7504, + "step": 2700 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001136043533930858, + "loss": 0.7657, + "step": 2701 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011357234314980794, + "loss": 0.7275, + "step": 2702 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001135403329065301, + "loss": 0.6684, + "step": 2703 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011350832266325225, + "loss": 0.7726, + "step": 2704 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011347631241997441, + "loss": 0.7801, + "step": 2705 + }, + { + "epoch": 2.16, + "learning_rate": 0.00011344430217669655, + "loss": 0.6749, + "step": 2706 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011341229193341869, + "loss": 0.7266, + "step": 2707 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011338028169014086, + "loss": 0.8835, + "step": 2708 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011334827144686299, + "loss": 0.6929, + "step": 2709 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011331626120358517, + "loss": 0.7982, + "step": 2710 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001132842509603073, + "loss": 0.7641, + "step": 2711 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011325224071702945, + "loss": 0.7761, + "step": 2712 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001132202304737516, + "loss": 0.7583, + "step": 2713 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011318822023047375, + "loss": 0.6922, + "step": 2714 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011315620998719591, + "loss": 0.6959, + "step": 2715 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011312419974391806, + "loss": 0.7338, + "step": 2716 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001130921895006402, + "loss": 0.8185, + "step": 2717 + }, + { + "epoch": 2.17, + "learning_rate": 0.00011306017925736236, + "loss": 0.7502, + "step": 2718 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011302816901408451, + "loss": 0.5995, + "step": 2719 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011299615877080667, + "loss": 0.6897, + "step": 2720 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011296414852752882, + "loss": 0.7661, + "step": 2721 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011293213828425098, + "loss": 0.7784, + "step": 2722 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011290012804097312, + "loss": 0.7971, + "step": 2723 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011286811779769525, + "loss": 0.8032, + "step": 2724 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011283610755441743, + "loss": 0.8649, + "step": 2725 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011280409731113957, + "loss": 0.6978, + "step": 2726 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011277208706786173, + "loss": 0.7986, + "step": 2727 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011274007682458388, + "loss": 0.6867, + "step": 2728 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011270806658130601, + "loss": 0.7625, + "step": 2729 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011267605633802819, + "loss": 0.8267, + "step": 2730 + }, + { + "epoch": 2.18, + "learning_rate": 0.00011264404609475032, + "loss": 0.7203, + "step": 2731 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011261203585147249, + "loss": 0.689, + "step": 2732 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011258002560819462, + "loss": 0.7133, + "step": 2733 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011254801536491677, + "loss": 0.6844, + "step": 2734 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011251600512163893, + "loss": 0.8168, + "step": 2735 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011248399487836108, + "loss": 0.6101, + "step": 2736 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011245198463508324, + "loss": 0.8262, + "step": 2737 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011241997439180538, + "loss": 0.8676, + "step": 2738 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011238796414852753, + "loss": 0.792, + "step": 2739 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011235595390524969, + "loss": 0.6827, + "step": 2740 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011232394366197184, + "loss": 0.79, + "step": 2741 + }, + { + "epoch": 2.19, + "learning_rate": 0.000112291933418694, + "loss": 0.8492, + "step": 2742 + }, + { + "epoch": 2.19, + "learning_rate": 0.00011225992317541614, + "loss": 0.6781, + "step": 2743 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011222791293213827, + "loss": 0.7206, + "step": 2744 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011219590268886045, + "loss": 0.7191, + "step": 2745 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011216389244558258, + "loss": 0.7237, + "step": 2746 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011213188220230475, + "loss": 0.6823, + "step": 2747 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011209987195902689, + "loss": 0.8338, + "step": 2748 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011206786171574903, + "loss": 0.6784, + "step": 2749 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011203585147247119, + "loss": 0.7582, + "step": 2750 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011200384122919334, + "loss": 0.7048, + "step": 2751 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001119718309859155, + "loss": 0.7567, + "step": 2752 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011193982074263764, + "loss": 0.8061, + "step": 2753 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011190781049935982, + "loss": 0.8665, + "step": 2754 + }, + { + "epoch": 2.2, + "learning_rate": 0.00011187580025608195, + "loss": 0.7462, + "step": 2755 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001118437900128041, + "loss": 0.6521, + "step": 2756 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011181177976952626, + "loss": 0.7702, + "step": 2757 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001117797695262484, + "loss": 0.6603, + "step": 2758 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011174775928297056, + "loss": 0.655, + "step": 2759 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011171574903969271, + "loss": 0.6808, + "step": 2760 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011168373879641485, + "loss": 0.7912, + "step": 2761 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011165172855313701, + "loss": 0.7383, + "step": 2762 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011161971830985916, + "loss": 0.7879, + "step": 2763 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011158770806658132, + "loss": 0.846, + "step": 2764 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011155569782330347, + "loss": 0.754, + "step": 2765 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001115236875800256, + "loss": 0.7713, + "step": 2766 + }, + { + "epoch": 2.21, + "learning_rate": 0.00011149167733674777, + "loss": 0.8566, + "step": 2767 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001114596670934699, + "loss": 0.7089, + "step": 2768 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011142765685019208, + "loss": 0.7885, + "step": 2769 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011139564660691421, + "loss": 0.7838, + "step": 2770 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011136363636363636, + "loss": 0.7092, + "step": 2771 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011133162612035852, + "loss": 0.8319, + "step": 2772 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011129961587708066, + "loss": 0.7356, + "step": 2773 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011126760563380282, + "loss": 0.7034, + "step": 2774 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011123559539052497, + "loss": 0.8437, + "step": 2775 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011120358514724712, + "loss": 0.8333, + "step": 2776 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011117157490396928, + "loss": 0.8018, + "step": 2777 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011113956466069142, + "loss": 0.7066, + "step": 2778 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011110755441741358, + "loss": 0.6987, + "step": 2779 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011107554417413573, + "loss": 0.7384, + "step": 2780 + }, + { + "epoch": 2.22, + "learning_rate": 0.00011104353393085789, + "loss": 0.8401, + "step": 2781 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011101152368758003, + "loss": 0.7865, + "step": 2782 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011097951344430218, + "loss": 0.8528, + "step": 2783 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011094750320102434, + "loss": 0.822, + "step": 2784 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011091549295774649, + "loss": 0.7093, + "step": 2785 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011088348271446865, + "loss": 0.7562, + "step": 2786 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011085147247119079, + "loss": 0.7744, + "step": 2787 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011081946222791292, + "loss": 0.7343, + "step": 2788 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001107874519846351, + "loss": 0.7553, + "step": 2789 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011075544174135723, + "loss": 0.6779, + "step": 2790 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001107234314980794, + "loss": 0.7096, + "step": 2791 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011069142125480154, + "loss": 0.8799, + "step": 2792 + }, + { + "epoch": 2.23, + "learning_rate": 0.00011065941101152368, + "loss": 0.6849, + "step": 2793 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011062740076824584, + "loss": 0.6549, + "step": 2794 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011059539052496799, + "loss": 0.7539, + "step": 2795 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011056338028169015, + "loss": 0.7529, + "step": 2796 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001105313700384123, + "loss": 0.8081, + "step": 2797 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011049935979513444, + "loss": 0.6778, + "step": 2798 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001104673495518566, + "loss": 0.8088, + "step": 2799 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011043533930857875, + "loss": 0.7295, + "step": 2800 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011040332906530091, + "loss": 0.726, + "step": 2801 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011037131882202305, + "loss": 0.7224, + "step": 2802 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001103393085787452, + "loss": 0.7661, + "step": 2803 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011030729833546736, + "loss": 0.8402, + "step": 2804 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001102752880921895, + "loss": 0.6793, + "step": 2805 + }, + { + "epoch": 2.24, + "learning_rate": 0.00011024327784891167, + "loss": 0.8112, + "step": 2806 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011021126760563381, + "loss": 0.7924, + "step": 2807 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011017925736235597, + "loss": 0.7062, + "step": 2808 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011014724711907812, + "loss": 0.8203, + "step": 2809 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011011523687580025, + "loss": 0.6892, + "step": 2810 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011008322663252242, + "loss": 0.7386, + "step": 2811 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011005121638924456, + "loss": 0.7778, + "step": 2812 + }, + { + "epoch": 2.25, + "learning_rate": 0.00011001920614596673, + "loss": 0.7696, + "step": 2813 + }, + { + "epoch": 2.25, + "learning_rate": 0.00010998719590268886, + "loss": 0.7173, + "step": 2814 + }, + { + "epoch": 2.25, + "learning_rate": 0.00010995518565941101, + "loss": 0.7153, + "step": 2815 + }, + { + "epoch": 2.25, + "learning_rate": 0.00010992317541613317, + "loss": 0.8022, + "step": 2816 + }, + { + "epoch": 2.25, + "learning_rate": 0.00010989116517285531, + "loss": 0.8339, + "step": 2817 + }, + { + "epoch": 2.25, + "learning_rate": 0.00010985915492957747, + "loss": 0.6598, + "step": 2818 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010982714468629962, + "loss": 0.812, + "step": 2819 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010979513444302177, + "loss": 0.7727, + "step": 2820 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010976312419974393, + "loss": 0.7132, + "step": 2821 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010973111395646607, + "loss": 0.6565, + "step": 2822 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010969910371318823, + "loss": 0.7577, + "step": 2823 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010966709346991038, + "loss": 0.7975, + "step": 2824 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010963508322663251, + "loss": 0.9308, + "step": 2825 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010960307298335468, + "loss": 0.7287, + "step": 2826 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010957106274007682, + "loss": 0.635, + "step": 2827 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010953905249679899, + "loss": 0.7248, + "step": 2828 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010950704225352114, + "loss": 0.6549, + "step": 2829 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010947503201024327, + "loss": 0.661, + "step": 2830 + }, + { + "epoch": 2.26, + "learning_rate": 0.00010944302176696544, + "loss": 0.822, + "step": 2831 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010941101152368758, + "loss": 0.7901, + "step": 2832 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010937900128040975, + "loss": 0.7717, + "step": 2833 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010934699103713188, + "loss": 0.7677, + "step": 2834 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010931498079385406, + "loss": 0.7423, + "step": 2835 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010928297055057619, + "loss": 0.7465, + "step": 2836 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010925096030729833, + "loss": 0.7759, + "step": 2837 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001092189500640205, + "loss": 0.8715, + "step": 2838 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010918693982074264, + "loss": 0.9301, + "step": 2839 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001091549295774648, + "loss": 0.7621, + "step": 2840 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010912291933418695, + "loss": 0.6702, + "step": 2841 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010909090909090909, + "loss": 0.7293, + "step": 2842 + }, + { + "epoch": 2.27, + "learning_rate": 0.00010905889884763125, + "loss": 0.7899, + "step": 2843 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001090268886043534, + "loss": 0.7037, + "step": 2844 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010899487836107556, + "loss": 0.8262, + "step": 2845 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001089628681177977, + "loss": 0.7469, + "step": 2846 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010893085787451984, + "loss": 0.7508, + "step": 2847 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010889884763124201, + "loss": 0.8034, + "step": 2848 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010886683738796414, + "loss": 0.7848, + "step": 2849 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010883482714468632, + "loss": 0.7512, + "step": 2850 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010880281690140845, + "loss": 0.8466, + "step": 2851 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001087708066581306, + "loss": 0.7281, + "step": 2852 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010873879641485276, + "loss": 0.7975, + "step": 2853 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001087067861715749, + "loss": 0.8252, + "step": 2854 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010867477592829706, + "loss": 0.6473, + "step": 2855 + }, + { + "epoch": 2.28, + "learning_rate": 0.00010864276568501921, + "loss": 0.7509, + "step": 2856 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010861075544174135, + "loss": 0.7057, + "step": 2857 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010857874519846351, + "loss": 0.6553, + "step": 2858 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010854673495518566, + "loss": 0.7575, + "step": 2859 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010851472471190782, + "loss": 0.6393, + "step": 2860 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010848271446862997, + "loss": 0.6733, + "step": 2861 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010845070422535213, + "loss": 0.6764, + "step": 2862 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010841869398207427, + "loss": 0.7881, + "step": 2863 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010838668373879642, + "loss": 0.6654, + "step": 2864 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010835467349551858, + "loss": 0.7564, + "step": 2865 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010832266325224072, + "loss": 0.8032, + "step": 2866 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010829065300896288, + "loss": 0.7194, + "step": 2867 + }, + { + "epoch": 2.29, + "learning_rate": 0.00010825864276568503, + "loss": 0.8221, + "step": 2868 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010822663252240716, + "loss": 0.7619, + "step": 2869 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010819462227912934, + "loss": 0.7463, + "step": 2870 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010816261203585147, + "loss": 0.7136, + "step": 2871 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010813060179257364, + "loss": 0.6622, + "step": 2872 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010809859154929577, + "loss": 0.9716, + "step": 2873 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010806658130601792, + "loss": 0.7546, + "step": 2874 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010803457106274008, + "loss": 0.8198, + "step": 2875 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010800256081946223, + "loss": 0.8527, + "step": 2876 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010797055057618439, + "loss": 0.7693, + "step": 2877 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010793854033290653, + "loss": 0.7703, + "step": 2878 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010790653008962868, + "loss": 0.7257, + "step": 2879 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010787451984635084, + "loss": 0.6584, + "step": 2880 + }, + { + "epoch": 2.3, + "learning_rate": 0.00010784250960307299, + "loss": 0.7969, + "step": 2881 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010781049935979515, + "loss": 0.686, + "step": 2882 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010777848911651729, + "loss": 0.789, + "step": 2883 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010774647887323944, + "loss": 0.7876, + "step": 2884 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001077144686299616, + "loss": 0.7464, + "step": 2885 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010768245838668374, + "loss": 0.8391, + "step": 2886 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001076504481434059, + "loss": 0.7214, + "step": 2887 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010761843790012805, + "loss": 0.7708, + "step": 2888 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010758642765685021, + "loss": 0.7889, + "step": 2889 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010755441741357236, + "loss": 0.6609, + "step": 2890 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010752240717029449, + "loss": 0.7613, + "step": 2891 + }, + { + "epoch": 2.31, + "learning_rate": 0.00010749039692701666, + "loss": 0.7319, + "step": 2892 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001074583866837388, + "loss": 0.6815, + "step": 2893 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010742637644046097, + "loss": 0.8365, + "step": 2894 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001073943661971831, + "loss": 0.6658, + "step": 2895 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010736235595390525, + "loss": 0.7065, + "step": 2896 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001073303457106274, + "loss": 0.7503, + "step": 2897 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010729833546734955, + "loss": 0.7549, + "step": 2898 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010726632522407171, + "loss": 0.8223, + "step": 2899 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010723431498079386, + "loss": 0.7683, + "step": 2900 + }, + { + "epoch": 2.32, + "learning_rate": 0.000107202304737516, + "loss": 0.7744, + "step": 2901 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010717029449423816, + "loss": 0.7275, + "step": 2902 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010713828425096031, + "loss": 0.8255, + "step": 2903 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010710627400768247, + "loss": 0.7826, + "step": 2904 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010707426376440462, + "loss": 0.7893, + "step": 2905 + }, + { + "epoch": 2.32, + "learning_rate": 0.00010704225352112676, + "loss": 0.7067, + "step": 2906 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010701024327784892, + "loss": 0.7584, + "step": 2907 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010697823303457107, + "loss": 0.7899, + "step": 2908 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010694622279129323, + "loss": 0.7133, + "step": 2909 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010691421254801538, + "loss": 0.663, + "step": 2910 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010688220230473751, + "loss": 0.755, + "step": 2911 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010685019206145968, + "loss": 0.7052, + "step": 2912 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010681818181818181, + "loss": 0.6, + "step": 2913 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010678617157490399, + "loss": 0.8111, + "step": 2914 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010675416133162612, + "loss": 0.7169, + "step": 2915 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010672215108834827, + "loss": 0.8247, + "step": 2916 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010669014084507043, + "loss": 0.7434, + "step": 2917 + }, + { + "epoch": 2.33, + "learning_rate": 0.00010665813060179257, + "loss": 0.8262, + "step": 2918 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010662612035851473, + "loss": 0.7125, + "step": 2919 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010659411011523688, + "loss": 0.7373, + "step": 2920 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010656209987195904, + "loss": 0.6206, + "step": 2921 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010653008962868118, + "loss": 0.8749, + "step": 2922 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010649807938540333, + "loss": 0.8677, + "step": 2923 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010646606914212549, + "loss": 0.8175, + "step": 2924 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010643405889884764, + "loss": 0.8014, + "step": 2925 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001064020486555698, + "loss": 0.7386, + "step": 2926 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010637003841229194, + "loss": 0.6876, + "step": 2927 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010633802816901408, + "loss": 0.6543, + "step": 2928 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010630601792573625, + "loss": 0.7554, + "step": 2929 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010627400768245838, + "loss": 0.8674, + "step": 2930 + }, + { + "epoch": 2.34, + "learning_rate": 0.00010624199743918055, + "loss": 0.7695, + "step": 2931 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010620998719590269, + "loss": 0.6313, + "step": 2932 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010617797695262483, + "loss": 0.6583, + "step": 2933 + }, + { + "epoch": 2.35, + "learning_rate": 0.000106145966709347, + "loss": 0.8419, + "step": 2934 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010611395646606914, + "loss": 0.6434, + "step": 2935 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010608194622279131, + "loss": 0.6814, + "step": 2936 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010604993597951345, + "loss": 0.7242, + "step": 2937 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010601792573623559, + "loss": 0.7061, + "step": 2938 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010598591549295775, + "loss": 0.6439, + "step": 2939 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001059539052496799, + "loss": 0.7235, + "step": 2940 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010592189500640206, + "loss": 0.7892, + "step": 2941 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001058898847631242, + "loss": 0.7795, + "step": 2942 + }, + { + "epoch": 2.35, + "learning_rate": 0.00010585787451984635, + "loss": 0.7974, + "step": 2943 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010582586427656851, + "loss": 0.7119, + "step": 2944 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010579385403329066, + "loss": 0.7623, + "step": 2945 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010576184379001282, + "loss": 0.8284, + "step": 2946 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010572983354673496, + "loss": 0.6824, + "step": 2947 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010569782330345712, + "loss": 0.8561, + "step": 2948 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010566581306017927, + "loss": 0.7821, + "step": 2949 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001056338028169014, + "loss": 0.585, + "step": 2950 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010560179257362357, + "loss": 0.7845, + "step": 2951 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001055697823303457, + "loss": 0.6917, + "step": 2952 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010553777208706788, + "loss": 0.8157, + "step": 2953 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010550576184379001, + "loss": 0.8446, + "step": 2954 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010547375160051216, + "loss": 0.8268, + "step": 2955 + }, + { + "epoch": 2.36, + "learning_rate": 0.00010544174135723432, + "loss": 0.7445, + "step": 2956 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010540973111395646, + "loss": 0.7484, + "step": 2957 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010537772087067862, + "loss": 0.9661, + "step": 2958 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010534571062740077, + "loss": 0.7076, + "step": 2959 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010531370038412292, + "loss": 0.7548, + "step": 2960 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010528169014084508, + "loss": 0.7675, + "step": 2961 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010524967989756722, + "loss": 0.7624, + "step": 2962 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010521766965428938, + "loss": 0.7903, + "step": 2963 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010518565941101153, + "loss": 0.8342, + "step": 2964 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010515364916773368, + "loss": 0.8473, + "step": 2965 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010512163892445584, + "loss": 0.7703, + "step": 2966 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010508962868117798, + "loss": 0.7901, + "step": 2967 + }, + { + "epoch": 2.37, + "learning_rate": 0.00010505761843790014, + "loss": 0.7052, + "step": 2968 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010502560819462229, + "loss": 0.587, + "step": 2969 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010499359795134442, + "loss": 0.7496, + "step": 2970 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001049615877080666, + "loss": 0.7893, + "step": 2971 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010492957746478873, + "loss": 0.8012, + "step": 2972 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001048975672215109, + "loss": 0.8808, + "step": 2973 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010486555697823303, + "loss": 0.7379, + "step": 2974 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001048335467349552, + "loss": 0.7547, + "step": 2975 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010480153649167734, + "loss": 0.7066, + "step": 2976 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010476952624839948, + "loss": 0.5656, + "step": 2977 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010473751600512164, + "loss": 0.7861, + "step": 2978 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010470550576184379, + "loss": 0.804, + "step": 2979 + }, + { + "epoch": 2.38, + "learning_rate": 0.00010467349551856595, + "loss": 0.805, + "step": 2980 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001046414852752881, + "loss": 0.7605, + "step": 2981 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010460947503201024, + "loss": 0.6171, + "step": 2982 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001045774647887324, + "loss": 0.8036, + "step": 2983 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010454545454545455, + "loss": 0.7473, + "step": 2984 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010451344430217671, + "loss": 0.7969, + "step": 2985 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010448143405889885, + "loss": 0.6046, + "step": 2986 + }, + { + "epoch": 2.39, + "learning_rate": 0.000104449423815621, + "loss": 0.7979, + "step": 2987 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010441741357234316, + "loss": 0.7891, + "step": 2988 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010438540332906531, + "loss": 0.7196, + "step": 2989 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010435339308578747, + "loss": 0.7584, + "step": 2990 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010432138284250961, + "loss": 0.7585, + "step": 2991 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010428937259923175, + "loss": 0.8006, + "step": 2992 + }, + { + "epoch": 2.39, + "learning_rate": 0.00010425736235595392, + "loss": 0.7747, + "step": 2993 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010422535211267605, + "loss": 0.7553, + "step": 2994 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010419334186939822, + "loss": 0.6685, + "step": 2995 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010416133162612036, + "loss": 0.7339, + "step": 2996 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001041293213828425, + "loss": 0.624, + "step": 2997 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010409731113956466, + "loss": 0.838, + "step": 2998 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010406530089628681, + "loss": 0.7045, + "step": 2999 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010403329065300897, + "loss": 0.6755, + "step": 3000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010400128040973112, + "loss": 0.7125, + "step": 3001 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010396927016645328, + "loss": 0.8702, + "step": 3002 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010393725992317542, + "loss": 0.7519, + "step": 3003 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010390524967989757, + "loss": 0.7437, + "step": 3004 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010387323943661973, + "loss": 0.8127, + "step": 3005 + }, + { + "epoch": 2.4, + "learning_rate": 0.00010384122919334187, + "loss": 0.8399, + "step": 3006 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010380921895006403, + "loss": 0.6314, + "step": 3007 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010377720870678618, + "loss": 0.6732, + "step": 3008 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010374519846350831, + "loss": 0.7058, + "step": 3009 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010371318822023049, + "loss": 0.6973, + "step": 3010 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010368117797695263, + "loss": 0.8507, + "step": 3011 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010364916773367479, + "loss": 0.9196, + "step": 3012 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010361715749039694, + "loss": 0.7347, + "step": 3013 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010358514724711907, + "loss": 0.8507, + "step": 3014 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010355313700384124, + "loss": 0.6584, + "step": 3015 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010352112676056338, + "loss": 0.7154, + "step": 3016 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010348911651728555, + "loss": 0.7608, + "step": 3017 + }, + { + "epoch": 2.41, + "learning_rate": 0.00010345710627400768, + "loss": 0.7605, + "step": 3018 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010342509603072983, + "loss": 0.7796, + "step": 3019 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010339308578745199, + "loss": 0.8512, + "step": 3020 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010336107554417414, + "loss": 0.7987, + "step": 3021 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001033290653008963, + "loss": 0.722, + "step": 3022 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010329705505761844, + "loss": 0.8755, + "step": 3023 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010326504481434059, + "loss": 0.6803, + "step": 3024 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010323303457106275, + "loss": 0.7591, + "step": 3025 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001032010243277849, + "loss": 0.6921, + "step": 3026 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010316901408450705, + "loss": 0.7855, + "step": 3027 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001031370038412292, + "loss": 0.8757, + "step": 3028 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010310499359795136, + "loss": 0.72, + "step": 3029 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001030729833546735, + "loss": 0.6634, + "step": 3030 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010304097311139564, + "loss": 0.744, + "step": 3031 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010300896286811781, + "loss": 0.7928, + "step": 3032 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010297695262483994, + "loss": 0.7854, + "step": 3033 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010294494238156212, + "loss": 0.8709, + "step": 3034 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010291293213828425, + "loss": 0.8246, + "step": 3035 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001028809218950064, + "loss": 0.8054, + "step": 3036 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010284891165172857, + "loss": 0.7955, + "step": 3037 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001028169014084507, + "loss": 0.6639, + "step": 3038 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010278489116517288, + "loss": 0.6431, + "step": 3039 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010275288092189501, + "loss": 0.7188, + "step": 3040 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010272087067861715, + "loss": 0.7602, + "step": 3041 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010268886043533931, + "loss": 0.6552, + "step": 3042 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010265685019206146, + "loss": 0.76, + "step": 3043 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010262483994878362, + "loss": 0.7439, + "step": 3044 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010259282970550577, + "loss": 0.7147, + "step": 3045 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010256081946222791, + "loss": 0.743, + "step": 3046 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010252880921895007, + "loss": 0.6316, + "step": 3047 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010249679897567222, + "loss": 0.8192, + "step": 3048 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010246478873239438, + "loss": 0.6575, + "step": 3049 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010243277848911653, + "loss": 0.7623, + "step": 3050 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010240076824583866, + "loss": 0.8161, + "step": 3051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010236875800256083, + "loss": 0.7611, + "step": 3052 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010233674775928296, + "loss": 0.7585, + "step": 3053 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010230473751600514, + "loss": 0.6443, + "step": 3054 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010227272727272727, + "loss": 0.6584, + "step": 3055 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010224071702944942, + "loss": 0.6959, + "step": 3056 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010220870678617158, + "loss": 0.7398, + "step": 3057 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010217669654289372, + "loss": 0.766, + "step": 3058 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010214468629961588, + "loss": 0.7159, + "step": 3059 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010211267605633803, + "loss": 0.737, + "step": 3060 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010208066581306019, + "loss": 0.7171, + "step": 3061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010204865556978233, + "loss": 0.7649, + "step": 3062 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010201664532650448, + "loss": 0.799, + "step": 3063 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010198463508322664, + "loss": 0.8145, + "step": 3064 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010195262483994879, + "loss": 0.8177, + "step": 3065 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010192061459667095, + "loss": 0.7402, + "step": 3066 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010188860435339309, + "loss": 0.6594, + "step": 3067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010185659411011524, + "loss": 0.764, + "step": 3068 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001018245838668374, + "loss": 0.7704, + "step": 3069 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010179257362355954, + "loss": 0.8791, + "step": 3070 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001017605633802817, + "loss": 0.7022, + "step": 3071 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010172855313700385, + "loss": 0.6411, + "step": 3072 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010169654289372598, + "loss": 0.8373, + "step": 3073 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010166453265044816, + "loss": 0.8367, + "step": 3074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010163252240717029, + "loss": 0.7559, + "step": 3075 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010160051216389246, + "loss": 0.6553, + "step": 3076 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001015685019206146, + "loss": 0.8219, + "step": 3077 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010153649167733674, + "loss": 0.7503, + "step": 3078 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001015044814340589, + "loss": 0.797, + "step": 3079 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010147247119078105, + "loss": 0.6492, + "step": 3080 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010144046094750321, + "loss": 0.9552, + "step": 3081 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010140845070422535, + "loss": 0.6169, + "step": 3082 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001013764404609475, + "loss": 0.7344, + "step": 3083 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010134443021766966, + "loss": 0.6645, + "step": 3084 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001013124199743918, + "loss": 0.9632, + "step": 3085 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010128040973111397, + "loss": 0.7757, + "step": 3086 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010124839948783611, + "loss": 0.77, + "step": 3087 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010121638924455827, + "loss": 0.6727, + "step": 3088 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010118437900128042, + "loss": 0.5957, + "step": 3089 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010115236875800256, + "loss": 0.683, + "step": 3090 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010112035851472472, + "loss": 0.7904, + "step": 3091 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010108834827144687, + "loss": 0.6627, + "step": 3092 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010105633802816903, + "loss": 0.7328, + "step": 3093 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010102432778489118, + "loss": 0.8373, + "step": 3094 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010099231754161331, + "loss": 0.7284, + "step": 3095 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010096030729833548, + "loss": 0.7506, + "step": 3096 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010092829705505761, + "loss": 0.8394, + "step": 3097 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010089628681177979, + "loss": 0.7253, + "step": 3098 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010086427656850192, + "loss": 0.8019, + "step": 3099 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010083226632522407, + "loss": 0.864, + "step": 3100 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010080025608194623, + "loss": 0.827, + "step": 3101 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010076824583866837, + "loss": 0.6554, + "step": 3102 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010073623559539053, + "loss": 0.7624, + "step": 3103 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010070422535211268, + "loss": 0.7467, + "step": 3104 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010067221510883483, + "loss": 0.8381, + "step": 3105 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010064020486555699, + "loss": 0.6798, + "step": 3106 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010060819462227913, + "loss": 0.7463, + "step": 3107 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010057618437900129, + "loss": 0.7776, + "step": 3108 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010054417413572344, + "loss": 0.7464, + "step": 3109 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010051216389244557, + "loss": 0.8203, + "step": 3110 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010048015364916774, + "loss": 0.7048, + "step": 3111 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010044814340588988, + "loss": 0.8408, + "step": 3112 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010041613316261205, + "loss": 0.8052, + "step": 3113 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001003841229193342, + "loss": 0.6494, + "step": 3114 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010035211267605636, + "loss": 0.8301, + "step": 3115 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001003201024327785, + "loss": 0.7847, + "step": 3116 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010028809218950063, + "loss": 0.8566, + "step": 3117 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010025608194622281, + "loss": 0.6464, + "step": 3118 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010022407170294494, + "loss": 0.6818, + "step": 3119 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010019206145966711, + "loss": 0.7421, + "step": 3120 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010016005121638925, + "loss": 0.7746, + "step": 3121 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010012804097311139, + "loss": 0.679, + "step": 3122 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010009603072983355, + "loss": 0.8434, + "step": 3123 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001000640204865557, + "loss": 0.7942, + "step": 3124 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010003201024327786, + "loss": 0.919, + "step": 3125 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001, + "loss": 0.7921, + "step": 3126 + }, + { + "epoch": 2.5, + "learning_rate": 9.996798975672216e-05, + "loss": 0.7138, + "step": 3127 + }, + { + "epoch": 2.5, + "learning_rate": 9.993597951344431e-05, + "loss": 0.7953, + "step": 3128 + }, + { + "epoch": 2.5, + "learning_rate": 9.990396927016646e-05, + "loss": 0.7121, + "step": 3129 + }, + { + "epoch": 2.5, + "learning_rate": 9.98719590268886e-05, + "loss": 0.6707, + "step": 3130 + }, + { + "epoch": 2.5, + "learning_rate": 9.983994878361076e-05, + "loss": 0.6766, + "step": 3131 + }, + { + "epoch": 2.51, + "learning_rate": 9.980793854033291e-05, + "loss": 0.7304, + "step": 3132 + }, + { + "epoch": 2.51, + "learning_rate": 9.977592829705507e-05, + "loss": 0.7607, + "step": 3133 + }, + { + "epoch": 2.51, + "learning_rate": 9.974391805377722e-05, + "loss": 0.6566, + "step": 3134 + }, + { + "epoch": 2.51, + "learning_rate": 9.971190781049936e-05, + "loss": 0.7113, + "step": 3135 + }, + { + "epoch": 2.51, + "learning_rate": 9.967989756722151e-05, + "loss": 0.7325, + "step": 3136 + }, + { + "epoch": 2.51, + "learning_rate": 9.964788732394367e-05, + "loss": 0.758, + "step": 3137 + }, + { + "epoch": 2.51, + "learning_rate": 9.961587708066581e-05, + "loss": 0.7019, + "step": 3138 + }, + { + "epoch": 2.51, + "learning_rate": 9.958386683738797e-05, + "loss": 0.9103, + "step": 3139 + }, + { + "epoch": 2.51, + "learning_rate": 9.955185659411012e-05, + "loss": 0.9091, + "step": 3140 + }, + { + "epoch": 2.51, + "learning_rate": 9.951984635083227e-05, + "loss": 0.7263, + "step": 3141 + }, + { + "epoch": 2.51, + "learning_rate": 9.948783610755443e-05, + "loss": 0.7479, + "step": 3142 + }, + { + "epoch": 2.51, + "learning_rate": 9.945582586427657e-05, + "loss": 0.7853, + "step": 3143 + }, + { + "epoch": 2.52, + "learning_rate": 9.942381562099873e-05, + "loss": 0.8859, + "step": 3144 + }, + { + "epoch": 2.52, + "learning_rate": 9.939180537772086e-05, + "loss": 0.6567, + "step": 3145 + }, + { + "epoch": 2.52, + "learning_rate": 9.935979513444302e-05, + "loss": 0.7457, + "step": 3146 + }, + { + "epoch": 2.52, + "learning_rate": 9.932778489116517e-05, + "loss": 0.788, + "step": 3147 + }, + { + "epoch": 2.52, + "learning_rate": 9.929577464788733e-05, + "loss": 0.7907, + "step": 3148 + }, + { + "epoch": 2.52, + "learning_rate": 9.926376440460948e-05, + "loss": 0.8579, + "step": 3149 + }, + { + "epoch": 2.52, + "learning_rate": 9.923175416133164e-05, + "loss": 0.8234, + "step": 3150 + }, + { + "epoch": 2.52, + "learning_rate": 9.919974391805378e-05, + "loss": 0.813, + "step": 3151 + }, + { + "epoch": 2.52, + "learning_rate": 9.916773367477593e-05, + "loss": 0.8119, + "step": 3152 + }, + { + "epoch": 2.52, + "learning_rate": 9.913572343149809e-05, + "loss": 0.7834, + "step": 3153 + }, + { + "epoch": 2.52, + "learning_rate": 9.910371318822023e-05, + "loss": 0.7309, + "step": 3154 + }, + { + "epoch": 2.52, + "learning_rate": 9.90717029449424e-05, + "loss": 0.7642, + "step": 3155 + }, + { + "epoch": 2.52, + "learning_rate": 9.903969270166453e-05, + "loss": 0.752, + "step": 3156 + }, + { + "epoch": 2.53, + "learning_rate": 9.900768245838669e-05, + "loss": 0.7264, + "step": 3157 + }, + { + "epoch": 2.53, + "learning_rate": 9.897567221510883e-05, + "loss": 0.7314, + "step": 3158 + }, + { + "epoch": 2.53, + "learning_rate": 9.894366197183099e-05, + "loss": 0.8204, + "step": 3159 + }, + { + "epoch": 2.53, + "learning_rate": 9.891165172855314e-05, + "loss": 0.7574, + "step": 3160 + }, + { + "epoch": 2.53, + "learning_rate": 9.88796414852753e-05, + "loss": 0.7494, + "step": 3161 + }, + { + "epoch": 2.53, + "learning_rate": 9.884763124199745e-05, + "loss": 0.835, + "step": 3162 + }, + { + "epoch": 2.53, + "learning_rate": 9.881562099871959e-05, + "loss": 0.7666, + "step": 3163 + }, + { + "epoch": 2.53, + "learning_rate": 9.878361075544175e-05, + "loss": 0.6899, + "step": 3164 + }, + { + "epoch": 2.53, + "learning_rate": 9.87516005121639e-05, + "loss": 0.8076, + "step": 3165 + }, + { + "epoch": 2.53, + "learning_rate": 9.871959026888606e-05, + "loss": 0.8082, + "step": 3166 + }, + { + "epoch": 2.53, + "learning_rate": 9.868758002560819e-05, + "loss": 0.7648, + "step": 3167 + }, + { + "epoch": 2.53, + "learning_rate": 9.865556978233035e-05, + "loss": 0.7868, + "step": 3168 + }, + { + "epoch": 2.54, + "learning_rate": 9.86235595390525e-05, + "loss": 0.7117, + "step": 3169 + }, + { + "epoch": 2.54, + "learning_rate": 9.859154929577466e-05, + "loss": 0.775, + "step": 3170 + }, + { + "epoch": 2.54, + "learning_rate": 9.85595390524968e-05, + "loss": 0.8082, + "step": 3171 + }, + { + "epoch": 2.54, + "learning_rate": 9.852752880921895e-05, + "loss": 0.6562, + "step": 3172 + }, + { + "epoch": 2.54, + "learning_rate": 9.849551856594111e-05, + "loss": 0.6746, + "step": 3173 + }, + { + "epoch": 2.54, + "learning_rate": 9.846350832266325e-05, + "loss": 0.7261, + "step": 3174 + }, + { + "epoch": 2.54, + "learning_rate": 9.843149807938541e-05, + "loss": 0.7227, + "step": 3175 + }, + { + "epoch": 2.54, + "learning_rate": 9.839948783610756e-05, + "loss": 0.7806, + "step": 3176 + }, + { + "epoch": 2.54, + "learning_rate": 9.836747759282972e-05, + "loss": 0.7027, + "step": 3177 + }, + { + "epoch": 2.54, + "learning_rate": 9.833546734955185e-05, + "loss": 0.6752, + "step": 3178 + }, + { + "epoch": 2.54, + "learning_rate": 9.830345710627401e-05, + "loss": 0.6945, + "step": 3179 + }, + { + "epoch": 2.54, + "learning_rate": 9.827144686299616e-05, + "loss": 0.7129, + "step": 3180 + }, + { + "epoch": 2.54, + "learning_rate": 9.823943661971832e-05, + "loss": 0.8153, + "step": 3181 + }, + { + "epoch": 2.55, + "learning_rate": 9.820742637644046e-05, + "loss": 0.6587, + "step": 3182 + }, + { + "epoch": 2.55, + "learning_rate": 9.817541613316261e-05, + "loss": 0.7747, + "step": 3183 + }, + { + "epoch": 2.55, + "learning_rate": 9.814340588988477e-05, + "loss": 0.6913, + "step": 3184 + }, + { + "epoch": 2.55, + "learning_rate": 9.811139564660692e-05, + "loss": 0.7571, + "step": 3185 + }, + { + "epoch": 2.55, + "learning_rate": 9.807938540332908e-05, + "loss": 0.8504, + "step": 3186 + }, + { + "epoch": 2.55, + "learning_rate": 9.804737516005122e-05, + "loss": 0.9203, + "step": 3187 + }, + { + "epoch": 2.55, + "learning_rate": 9.801536491677337e-05, + "loss": 0.6594, + "step": 3188 + }, + { + "epoch": 2.55, + "learning_rate": 9.798335467349552e-05, + "loss": 0.8461, + "step": 3189 + }, + { + "epoch": 2.55, + "learning_rate": 9.795134443021768e-05, + "loss": 0.7134, + "step": 3190 + }, + { + "epoch": 2.55, + "learning_rate": 9.791933418693982e-05, + "loss": 0.7093, + "step": 3191 + }, + { + "epoch": 2.55, + "learning_rate": 9.788732394366198e-05, + "loss": 0.7466, + "step": 3192 + }, + { + "epoch": 2.55, + "learning_rate": 9.785531370038413e-05, + "loss": 0.7291, + "step": 3193 + }, + { + "epoch": 2.56, + "learning_rate": 9.782330345710627e-05, + "loss": 0.8144, + "step": 3194 + }, + { + "epoch": 2.56, + "learning_rate": 9.779129321382843e-05, + "loss": 0.759, + "step": 3195 + }, + { + "epoch": 2.56, + "learning_rate": 9.775928297055058e-05, + "loss": 0.7432, + "step": 3196 + }, + { + "epoch": 2.56, + "learning_rate": 9.772727272727274e-05, + "loss": 0.8008, + "step": 3197 + }, + { + "epoch": 2.56, + "learning_rate": 9.769526248399489e-05, + "loss": 0.7309, + "step": 3198 + }, + { + "epoch": 2.56, + "learning_rate": 9.766325224071703e-05, + "loss": 0.7451, + "step": 3199 + }, + { + "epoch": 2.56, + "learning_rate": 9.763124199743918e-05, + "loss": 0.7399, + "step": 3200 + }, + { + "epoch": 2.56, + "learning_rate": 9.759923175416134e-05, + "loss": 0.7847, + "step": 3201 + }, + { + "epoch": 2.56, + "learning_rate": 9.756722151088348e-05, + "loss": 0.7251, + "step": 3202 + }, + { + "epoch": 2.56, + "learning_rate": 9.753521126760564e-05, + "loss": 0.6384, + "step": 3203 + }, + { + "epoch": 2.56, + "learning_rate": 9.750320102432779e-05, + "loss": 0.7483, + "step": 3204 + }, + { + "epoch": 2.56, + "learning_rate": 9.747119078104994e-05, + "loss": 0.7533, + "step": 3205 + }, + { + "epoch": 2.56, + "learning_rate": 9.74391805377721e-05, + "loss": 0.811, + "step": 3206 + }, + { + "epoch": 2.57, + "learning_rate": 9.740717029449424e-05, + "loss": 0.7699, + "step": 3207 + }, + { + "epoch": 2.57, + "learning_rate": 9.73751600512164e-05, + "loss": 0.7438, + "step": 3208 + }, + { + "epoch": 2.57, + "learning_rate": 9.734314980793855e-05, + "loss": 0.7137, + "step": 3209 + }, + { + "epoch": 2.57, + "learning_rate": 9.73111395646607e-05, + "loss": 0.7488, + "step": 3210 + }, + { + "epoch": 2.57, + "learning_rate": 9.727912932138284e-05, + "loss": 0.7743, + "step": 3211 + }, + { + "epoch": 2.57, + "learning_rate": 9.7247119078105e-05, + "loss": 0.7308, + "step": 3212 + }, + { + "epoch": 2.57, + "learning_rate": 9.721510883482715e-05, + "loss": 0.6731, + "step": 3213 + }, + { + "epoch": 2.57, + "learning_rate": 9.718309859154931e-05, + "loss": 0.8083, + "step": 3214 + }, + { + "epoch": 2.57, + "learning_rate": 9.715108834827144e-05, + "loss": 0.6929, + "step": 3215 + }, + { + "epoch": 2.57, + "learning_rate": 9.71190781049936e-05, + "loss": 0.7657, + "step": 3216 + }, + { + "epoch": 2.57, + "learning_rate": 9.708706786171576e-05, + "loss": 0.7774, + "step": 3217 + }, + { + "epoch": 2.57, + "learning_rate": 9.70550576184379e-05, + "loss": 0.7326, + "step": 3218 + }, + { + "epoch": 2.58, + "learning_rate": 9.702304737516007e-05, + "loss": 0.7106, + "step": 3219 + }, + { + "epoch": 2.58, + "learning_rate": 9.699103713188221e-05, + "loss": 0.7056, + "step": 3220 + }, + { + "epoch": 2.58, + "learning_rate": 9.695902688860436e-05, + "loss": 0.6642, + "step": 3221 + }, + { + "epoch": 2.58, + "learning_rate": 9.69270166453265e-05, + "loss": 0.7049, + "step": 3222 + }, + { + "epoch": 2.58, + "learning_rate": 9.689500640204866e-05, + "loss": 0.8625, + "step": 3223 + }, + { + "epoch": 2.58, + "learning_rate": 9.686299615877081e-05, + "loss": 0.6616, + "step": 3224 + }, + { + "epoch": 2.58, + "learning_rate": 9.683098591549297e-05, + "loss": 0.73, + "step": 3225 + }, + { + "epoch": 2.58, + "learning_rate": 9.67989756722151e-05, + "loss": 0.6945, + "step": 3226 + }, + { + "epoch": 2.58, + "learning_rate": 9.676696542893726e-05, + "loss": 0.816, + "step": 3227 + }, + { + "epoch": 2.58, + "learning_rate": 9.673495518565941e-05, + "loss": 0.766, + "step": 3228 + }, + { + "epoch": 2.58, + "learning_rate": 9.670294494238157e-05, + "loss": 0.7024, + "step": 3229 + }, + { + "epoch": 2.58, + "learning_rate": 9.667093469910373e-05, + "loss": 0.7118, + "step": 3230 + }, + { + "epoch": 2.58, + "learning_rate": 9.663892445582587e-05, + "loss": 0.9246, + "step": 3231 + }, + { + "epoch": 2.59, + "learning_rate": 9.660691421254802e-05, + "loss": 0.7255, + "step": 3232 + }, + { + "epoch": 2.59, + "learning_rate": 9.657490396927017e-05, + "loss": 0.7413, + "step": 3233 + }, + { + "epoch": 2.59, + "learning_rate": 9.654289372599233e-05, + "loss": 0.8422, + "step": 3234 + }, + { + "epoch": 2.59, + "learning_rate": 9.651088348271447e-05, + "loss": 0.7779, + "step": 3235 + }, + { + "epoch": 2.59, + "learning_rate": 9.647887323943663e-05, + "loss": 0.7067, + "step": 3236 + }, + { + "epoch": 2.59, + "learning_rate": 9.644686299615877e-05, + "loss": 0.7671, + "step": 3237 + }, + { + "epoch": 2.59, + "learning_rate": 9.641485275288092e-05, + "loss": 0.6652, + "step": 3238 + }, + { + "epoch": 2.59, + "learning_rate": 9.638284250960307e-05, + "loss": 0.7769, + "step": 3239 + }, + { + "epoch": 2.59, + "learning_rate": 9.635083226632523e-05, + "loss": 0.7696, + "step": 3240 + }, + { + "epoch": 2.59, + "learning_rate": 9.631882202304738e-05, + "loss": 0.8159, + "step": 3241 + }, + { + "epoch": 2.59, + "learning_rate": 9.628681177976952e-05, + "loss": 0.7393, + "step": 3242 + }, + { + "epoch": 2.59, + "learning_rate": 9.625480153649168e-05, + "loss": 0.6135, + "step": 3243 + }, + { + "epoch": 2.6, + "learning_rate": 9.622279129321383e-05, + "loss": 0.6894, + "step": 3244 + }, + { + "epoch": 2.6, + "learning_rate": 9.619078104993599e-05, + "loss": 0.7813, + "step": 3245 + }, + { + "epoch": 2.6, + "learning_rate": 9.615877080665814e-05, + "loss": 0.8276, + "step": 3246 + }, + { + "epoch": 2.6, + "learning_rate": 9.61267605633803e-05, + "loss": 0.7217, + "step": 3247 + }, + { + "epoch": 2.6, + "learning_rate": 9.609475032010243e-05, + "loss": 0.7306, + "step": 3248 + }, + { + "epoch": 2.6, + "learning_rate": 9.606274007682459e-05, + "loss": 0.622, + "step": 3249 + }, + { + "epoch": 2.6, + "learning_rate": 9.603072983354673e-05, + "loss": 0.6793, + "step": 3250 + }, + { + "epoch": 2.6, + "learning_rate": 9.59987195902689e-05, + "loss": 0.752, + "step": 3251 + }, + { + "epoch": 2.6, + "learning_rate": 9.596670934699104e-05, + "loss": 0.6745, + "step": 3252 + }, + { + "epoch": 2.6, + "learning_rate": 9.593469910371319e-05, + "loss": 0.92, + "step": 3253 + }, + { + "epoch": 2.6, + "learning_rate": 9.590268886043535e-05, + "loss": 0.6253, + "step": 3254 + }, + { + "epoch": 2.6, + "learning_rate": 9.587067861715749e-05, + "loss": 0.7764, + "step": 3255 + }, + { + "epoch": 2.6, + "learning_rate": 9.583866837387965e-05, + "loss": 0.7902, + "step": 3256 + }, + { + "epoch": 2.61, + "learning_rate": 9.58066581306018e-05, + "loss": 0.7994, + "step": 3257 + }, + { + "epoch": 2.61, + "learning_rate": 9.577464788732394e-05, + "loss": 0.6058, + "step": 3258 + }, + { + "epoch": 2.61, + "learning_rate": 9.574263764404609e-05, + "loss": 0.7169, + "step": 3259 + }, + { + "epoch": 2.61, + "learning_rate": 9.571062740076825e-05, + "loss": 0.7929, + "step": 3260 + }, + { + "epoch": 2.61, + "learning_rate": 9.56786171574904e-05, + "loss": 0.8975, + "step": 3261 + }, + { + "epoch": 2.61, + "learning_rate": 9.564660691421256e-05, + "loss": 0.9405, + "step": 3262 + }, + { + "epoch": 2.61, + "learning_rate": 9.56145966709347e-05, + "loss": 0.6401, + "step": 3263 + }, + { + "epoch": 2.61, + "learning_rate": 9.558258642765685e-05, + "loss": 0.7699, + "step": 3264 + }, + { + "epoch": 2.61, + "learning_rate": 9.555057618437901e-05, + "loss": 0.6407, + "step": 3265 + }, + { + "epoch": 2.61, + "learning_rate": 9.551856594110115e-05, + "loss": 0.8884, + "step": 3266 + }, + { + "epoch": 2.61, + "learning_rate": 9.548655569782331e-05, + "loss": 0.773, + "step": 3267 + }, + { + "epoch": 2.61, + "learning_rate": 9.545454545454546e-05, + "loss": 0.9361, + "step": 3268 + }, + { + "epoch": 2.62, + "learning_rate": 9.542253521126761e-05, + "loss": 0.7068, + "step": 3269 + }, + { + "epoch": 2.62, + "learning_rate": 9.539052496798975e-05, + "loss": 0.888, + "step": 3270 + }, + { + "epoch": 2.62, + "learning_rate": 9.535851472471191e-05, + "loss": 0.7185, + "step": 3271 + }, + { + "epoch": 2.62, + "learning_rate": 9.532650448143406e-05, + "loss": 0.7151, + "step": 3272 + }, + { + "epoch": 2.62, + "learning_rate": 9.529449423815622e-05, + "loss": 0.7069, + "step": 3273 + }, + { + "epoch": 2.62, + "learning_rate": 9.526248399487837e-05, + "loss": 0.7717, + "step": 3274 + }, + { + "epoch": 2.62, + "learning_rate": 9.523047375160051e-05, + "loss": 0.7086, + "step": 3275 + }, + { + "epoch": 2.62, + "learning_rate": 9.519846350832267e-05, + "loss": 0.7758, + "step": 3276 + }, + { + "epoch": 2.62, + "learning_rate": 9.516645326504482e-05, + "loss": 0.7608, + "step": 3277 + }, + { + "epoch": 2.62, + "learning_rate": 9.513444302176698e-05, + "loss": 0.6706, + "step": 3278 + }, + { + "epoch": 2.62, + "learning_rate": 9.510243277848912e-05, + "loss": 0.6834, + "step": 3279 + }, + { + "epoch": 2.62, + "learning_rate": 9.507042253521127e-05, + "loss": 0.6608, + "step": 3280 + }, + { + "epoch": 2.62, + "learning_rate": 9.503841229193342e-05, + "loss": 0.6952, + "step": 3281 + }, + { + "epoch": 2.63, + "learning_rate": 9.500640204865558e-05, + "loss": 0.6871, + "step": 3282 + }, + { + "epoch": 2.63, + "learning_rate": 9.497439180537772e-05, + "loss": 0.7976, + "step": 3283 + }, + { + "epoch": 2.63, + "learning_rate": 9.494238156209988e-05, + "loss": 0.7723, + "step": 3284 + }, + { + "epoch": 2.63, + "learning_rate": 9.491037131882203e-05, + "loss": 0.6766, + "step": 3285 + }, + { + "epoch": 2.63, + "learning_rate": 9.487836107554417e-05, + "loss": 0.7226, + "step": 3286 + }, + { + "epoch": 2.63, + "learning_rate": 9.484635083226633e-05, + "loss": 0.658, + "step": 3287 + }, + { + "epoch": 2.63, + "learning_rate": 9.481434058898848e-05, + "loss": 0.734, + "step": 3288 + }, + { + "epoch": 2.63, + "learning_rate": 9.478233034571064e-05, + "loss": 0.7734, + "step": 3289 + }, + { + "epoch": 2.63, + "learning_rate": 9.475032010243279e-05, + "loss": 0.7657, + "step": 3290 + }, + { + "epoch": 2.63, + "learning_rate": 9.471830985915493e-05, + "loss": 0.7224, + "step": 3291 + }, + { + "epoch": 2.63, + "learning_rate": 9.468629961587708e-05, + "loss": 0.8735, + "step": 3292 + }, + { + "epoch": 2.63, + "learning_rate": 9.465428937259924e-05, + "loss": 0.7189, + "step": 3293 + }, + { + "epoch": 2.64, + "learning_rate": 9.462227912932138e-05, + "loss": 0.7735, + "step": 3294 + }, + { + "epoch": 2.64, + "learning_rate": 9.459026888604354e-05, + "loss": 0.7231, + "step": 3295 + }, + { + "epoch": 2.64, + "learning_rate": 9.455825864276569e-05, + "loss": 0.7243, + "step": 3296 + }, + { + "epoch": 2.64, + "learning_rate": 9.452624839948784e-05, + "loss": 0.8121, + "step": 3297 + }, + { + "epoch": 2.64, + "learning_rate": 9.449423815621e-05, + "loss": 0.8405, + "step": 3298 + }, + { + "epoch": 2.64, + "learning_rate": 9.446222791293214e-05, + "loss": 0.701, + "step": 3299 + }, + { + "epoch": 2.64, + "learning_rate": 9.44302176696543e-05, + "loss": 0.7691, + "step": 3300 + }, + { + "epoch": 2.64, + "learning_rate": 9.439820742637645e-05, + "loss": 0.7158, + "step": 3301 + }, + { + "epoch": 2.64, + "learning_rate": 9.43661971830986e-05, + "loss": 0.8439, + "step": 3302 + }, + { + "epoch": 2.64, + "learning_rate": 9.433418693982074e-05, + "loss": 0.6681, + "step": 3303 + }, + { + "epoch": 2.64, + "learning_rate": 9.43021766965429e-05, + "loss": 0.6909, + "step": 3304 + }, + { + "epoch": 2.64, + "learning_rate": 9.427016645326505e-05, + "loss": 0.7372, + "step": 3305 + }, + { + "epoch": 2.64, + "learning_rate": 9.423815620998721e-05, + "loss": 0.7026, + "step": 3306 + }, + { + "epoch": 2.65, + "learning_rate": 9.420614596670935e-05, + "loss": 0.7896, + "step": 3307 + }, + { + "epoch": 2.65, + "learning_rate": 9.41741357234315e-05, + "loss": 0.7206, + "step": 3308 + }, + { + "epoch": 2.65, + "learning_rate": 9.414212548015366e-05, + "loss": 0.7918, + "step": 3309 + }, + { + "epoch": 2.65, + "learning_rate": 9.41101152368758e-05, + "loss": 0.8121, + "step": 3310 + }, + { + "epoch": 2.65, + "learning_rate": 9.407810499359797e-05, + "loss": 0.7619, + "step": 3311 + }, + { + "epoch": 2.65, + "learning_rate": 9.40460947503201e-05, + "loss": 0.6473, + "step": 3312 + }, + { + "epoch": 2.65, + "learning_rate": 9.401408450704226e-05, + "loss": 0.6538, + "step": 3313 + }, + { + "epoch": 2.65, + "learning_rate": 9.39820742637644e-05, + "loss": 0.6144, + "step": 3314 + }, + { + "epoch": 2.65, + "learning_rate": 9.395006402048656e-05, + "loss": 0.6979, + "step": 3315 + }, + { + "epoch": 2.65, + "learning_rate": 9.391805377720871e-05, + "loss": 0.8324, + "step": 3316 + }, + { + "epoch": 2.65, + "learning_rate": 9.388604353393087e-05, + "loss": 0.7983, + "step": 3317 + }, + { + "epoch": 2.65, + "learning_rate": 9.3854033290653e-05, + "loss": 0.7099, + "step": 3318 + }, + { + "epoch": 2.66, + "learning_rate": 9.382202304737516e-05, + "loss": 0.8352, + "step": 3319 + }, + { + "epoch": 2.66, + "learning_rate": 9.379001280409732e-05, + "loss": 0.678, + "step": 3320 + }, + { + "epoch": 2.66, + "learning_rate": 9.375800256081947e-05, + "loss": 0.6794, + "step": 3321 + }, + { + "epoch": 2.66, + "learning_rate": 9.372599231754163e-05, + "loss": 0.7741, + "step": 3322 + }, + { + "epoch": 2.66, + "learning_rate": 9.369398207426376e-05, + "loss": 0.744, + "step": 3323 + }, + { + "epoch": 2.66, + "learning_rate": 9.366197183098592e-05, + "loss": 0.635, + "step": 3324 + }, + { + "epoch": 2.66, + "learning_rate": 9.362996158770807e-05, + "loss": 0.8191, + "step": 3325 + }, + { + "epoch": 2.66, + "learning_rate": 9.359795134443023e-05, + "loss": 0.7491, + "step": 3326 + }, + { + "epoch": 2.66, + "learning_rate": 9.356594110115237e-05, + "loss": 0.7097, + "step": 3327 + }, + { + "epoch": 2.66, + "learning_rate": 9.353393085787452e-05, + "loss": 0.8563, + "step": 3328 + }, + { + "epoch": 2.66, + "learning_rate": 9.350192061459667e-05, + "loss": 0.7315, + "step": 3329 + }, + { + "epoch": 2.66, + "learning_rate": 9.346991037131883e-05, + "loss": 0.8195, + "step": 3330 + }, + { + "epoch": 2.66, + "learning_rate": 9.343790012804097e-05, + "loss": 0.7377, + "step": 3331 + }, + { + "epoch": 2.67, + "learning_rate": 9.340588988476313e-05, + "loss": 0.7839, + "step": 3332 + }, + { + "epoch": 2.67, + "learning_rate": 9.337387964148528e-05, + "loss": 0.7064, + "step": 3333 + }, + { + "epoch": 2.67, + "learning_rate": 9.334186939820742e-05, + "loss": 0.787, + "step": 3334 + }, + { + "epoch": 2.67, + "learning_rate": 9.330985915492958e-05, + "loss": 0.9049, + "step": 3335 + }, + { + "epoch": 2.67, + "learning_rate": 9.327784891165173e-05, + "loss": 0.7069, + "step": 3336 + }, + { + "epoch": 2.67, + "learning_rate": 9.324583866837389e-05, + "loss": 0.7193, + "step": 3337 + }, + { + "epoch": 2.67, + "learning_rate": 9.321382842509604e-05, + "loss": 0.6776, + "step": 3338 + }, + { + "epoch": 2.67, + "learning_rate": 9.318181818181818e-05, + "loss": 0.7164, + "step": 3339 + }, + { + "epoch": 2.67, + "learning_rate": 9.314980793854033e-05, + "loss": 0.7305, + "step": 3340 + }, + { + "epoch": 2.67, + "learning_rate": 9.311779769526249e-05, + "loss": 0.6901, + "step": 3341 + }, + { + "epoch": 2.67, + "learning_rate": 9.308578745198463e-05, + "loss": 0.709, + "step": 3342 + }, + { + "epoch": 2.67, + "learning_rate": 9.30537772087068e-05, + "loss": 0.7376, + "step": 3343 + }, + { + "epoch": 2.68, + "learning_rate": 9.302176696542894e-05, + "loss": 0.7679, + "step": 3344 + }, + { + "epoch": 2.68, + "learning_rate": 9.298975672215109e-05, + "loss": 0.8033, + "step": 3345 + }, + { + "epoch": 2.68, + "learning_rate": 9.295774647887325e-05, + "loss": 0.5762, + "step": 3346 + }, + { + "epoch": 2.68, + "learning_rate": 9.292573623559539e-05, + "loss": 0.569, + "step": 3347 + }, + { + "epoch": 2.68, + "learning_rate": 9.289372599231755e-05, + "loss": 0.7459, + "step": 3348 + }, + { + "epoch": 2.68, + "learning_rate": 9.28617157490397e-05, + "loss": 0.703, + "step": 3349 + }, + { + "epoch": 2.68, + "learning_rate": 9.282970550576185e-05, + "loss": 0.8826, + "step": 3350 + }, + { + "epoch": 2.68, + "learning_rate": 9.279769526248399e-05, + "loss": 0.7886, + "step": 3351 + }, + { + "epoch": 2.68, + "learning_rate": 9.276568501920615e-05, + "loss": 0.8043, + "step": 3352 + }, + { + "epoch": 2.68, + "learning_rate": 9.27336747759283e-05, + "loss": 0.751, + "step": 3353 + }, + { + "epoch": 2.68, + "learning_rate": 9.270166453265046e-05, + "loss": 0.7804, + "step": 3354 + }, + { + "epoch": 2.68, + "learning_rate": 9.26696542893726e-05, + "loss": 0.8256, + "step": 3355 + }, + { + "epoch": 2.68, + "learning_rate": 9.263764404609475e-05, + "loss": 0.7921, + "step": 3356 + }, + { + "epoch": 2.69, + "learning_rate": 9.260563380281691e-05, + "loss": 0.6966, + "step": 3357 + }, + { + "epoch": 2.69, + "learning_rate": 9.257362355953906e-05, + "loss": 0.7518, + "step": 3358 + }, + { + "epoch": 2.69, + "learning_rate": 9.254161331626122e-05, + "loss": 0.8085, + "step": 3359 + }, + { + "epoch": 2.69, + "learning_rate": 9.250960307298336e-05, + "loss": 0.8015, + "step": 3360 + }, + { + "epoch": 2.69, + "learning_rate": 9.247759282970551e-05, + "loss": 0.7543, + "step": 3361 + }, + { + "epoch": 2.69, + "learning_rate": 9.244558258642765e-05, + "loss": 0.6892, + "step": 3362 + }, + { + "epoch": 2.69, + "learning_rate": 9.241357234314981e-05, + "loss": 0.7246, + "step": 3363 + }, + { + "epoch": 2.69, + "learning_rate": 9.238156209987196e-05, + "loss": 0.8146, + "step": 3364 + }, + { + "epoch": 2.69, + "learning_rate": 9.234955185659412e-05, + "loss": 0.6579, + "step": 3365 + }, + { + "epoch": 2.69, + "learning_rate": 9.231754161331627e-05, + "loss": 0.7171, + "step": 3366 + }, + { + "epoch": 2.69, + "learning_rate": 9.228553137003841e-05, + "loss": 0.7129, + "step": 3367 + }, + { + "epoch": 2.69, + "learning_rate": 9.225352112676057e-05, + "loss": 0.8106, + "step": 3368 + }, + { + "epoch": 2.7, + "learning_rate": 9.222151088348272e-05, + "loss": 0.7325, + "step": 3369 + }, + { + "epoch": 2.7, + "learning_rate": 9.218950064020488e-05, + "loss": 0.7534, + "step": 3370 + }, + { + "epoch": 2.7, + "learning_rate": 9.215749039692702e-05, + "loss": 0.7747, + "step": 3371 + }, + { + "epoch": 2.7, + "learning_rate": 9.212548015364917e-05, + "loss": 0.7761, + "step": 3372 + }, + { + "epoch": 2.7, + "learning_rate": 9.209346991037132e-05, + "loss": 0.6322, + "step": 3373 + }, + { + "epoch": 2.7, + "learning_rate": 9.206145966709348e-05, + "loss": 0.7122, + "step": 3374 + }, + { + "epoch": 2.7, + "learning_rate": 9.202944942381562e-05, + "loss": 0.8123, + "step": 3375 + }, + { + "epoch": 2.7, + "learning_rate": 9.199743918053778e-05, + "loss": 0.6652, + "step": 3376 + }, + { + "epoch": 2.7, + "learning_rate": 9.196542893725993e-05, + "loss": 0.8518, + "step": 3377 + }, + { + "epoch": 2.7, + "learning_rate": 9.193341869398208e-05, + "loss": 0.7832, + "step": 3378 + }, + { + "epoch": 2.7, + "learning_rate": 9.190140845070423e-05, + "loss": 0.649, + "step": 3379 + }, + { + "epoch": 2.7, + "learning_rate": 9.186939820742638e-05, + "loss": 0.7824, + "step": 3380 + }, + { + "epoch": 2.7, + "learning_rate": 9.183738796414854e-05, + "loss": 0.7179, + "step": 3381 + }, + { + "epoch": 2.71, + "learning_rate": 9.180537772087067e-05, + "loss": 0.7984, + "step": 3382 + }, + { + "epoch": 2.71, + "learning_rate": 9.177336747759283e-05, + "loss": 0.8131, + "step": 3383 + }, + { + "epoch": 2.71, + "learning_rate": 9.174135723431498e-05, + "loss": 0.7362, + "step": 3384 + }, + { + "epoch": 2.71, + "learning_rate": 9.170934699103714e-05, + "loss": 0.7143, + "step": 3385 + }, + { + "epoch": 2.71, + "learning_rate": 9.167733674775929e-05, + "loss": 0.6642, + "step": 3386 + }, + { + "epoch": 2.71, + "learning_rate": 9.164532650448145e-05, + "loss": 0.7525, + "step": 3387 + }, + { + "epoch": 2.71, + "learning_rate": 9.161331626120359e-05, + "loss": 0.7647, + "step": 3388 + }, + { + "epoch": 2.71, + "learning_rate": 9.158130601792574e-05, + "loss": 0.7053, + "step": 3389 + }, + { + "epoch": 2.71, + "learning_rate": 9.15492957746479e-05, + "loss": 0.6969, + "step": 3390 + }, + { + "epoch": 2.71, + "learning_rate": 9.151728553137004e-05, + "loss": 0.6927, + "step": 3391 + }, + { + "epoch": 2.71, + "learning_rate": 9.14852752880922e-05, + "loss": 0.5547, + "step": 3392 + }, + { + "epoch": 2.71, + "learning_rate": 9.145326504481434e-05, + "loss": 0.7429, + "step": 3393 + }, + { + "epoch": 2.72, + "learning_rate": 9.14212548015365e-05, + "loss": 0.7565, + "step": 3394 + }, + { + "epoch": 2.72, + "learning_rate": 9.138924455825864e-05, + "loss": 0.7839, + "step": 3395 + }, + { + "epoch": 2.72, + "learning_rate": 9.13572343149808e-05, + "loss": 0.7561, + "step": 3396 + }, + { + "epoch": 2.72, + "learning_rate": 9.132522407170295e-05, + "loss": 0.7354, + "step": 3397 + }, + { + "epoch": 2.72, + "learning_rate": 9.129321382842511e-05, + "loss": 0.8195, + "step": 3398 + }, + { + "epoch": 2.72, + "learning_rate": 9.126120358514725e-05, + "loss": 0.7426, + "step": 3399 + }, + { + "epoch": 2.72, + "learning_rate": 9.12291933418694e-05, + "loss": 0.789, + "step": 3400 + }, + { + "epoch": 2.72, + "learning_rate": 9.119718309859156e-05, + "loss": 0.7711, + "step": 3401 + }, + { + "epoch": 2.72, + "learning_rate": 9.11651728553137e-05, + "loss": 0.7351, + "step": 3402 + }, + { + "epoch": 2.72, + "learning_rate": 9.113316261203587e-05, + "loss": 0.788, + "step": 3403 + }, + { + "epoch": 2.72, + "learning_rate": 9.1101152368758e-05, + "loss": 0.6213, + "step": 3404 + }, + { + "epoch": 2.72, + "learning_rate": 9.106914212548016e-05, + "loss": 0.8, + "step": 3405 + }, + { + "epoch": 2.72, + "learning_rate": 9.10371318822023e-05, + "loss": 0.8201, + "step": 3406 + }, + { + "epoch": 2.73, + "learning_rate": 9.100512163892446e-05, + "loss": 0.7709, + "step": 3407 + }, + { + "epoch": 2.73, + "learning_rate": 9.097311139564661e-05, + "loss": 0.6818, + "step": 3408 + }, + { + "epoch": 2.73, + "learning_rate": 9.094110115236876e-05, + "loss": 0.7694, + "step": 3409 + }, + { + "epoch": 2.73, + "learning_rate": 9.090909090909092e-05, + "loss": 0.7752, + "step": 3410 + }, + { + "epoch": 2.73, + "learning_rate": 9.087708066581306e-05, + "loss": 0.8655, + "step": 3411 + }, + { + "epoch": 2.73, + "learning_rate": 9.084507042253522e-05, + "loss": 0.7461, + "step": 3412 + }, + { + "epoch": 2.73, + "learning_rate": 9.081306017925737e-05, + "loss": 0.7515, + "step": 3413 + }, + { + "epoch": 2.73, + "learning_rate": 9.078104993597953e-05, + "loss": 0.8181, + "step": 3414 + }, + { + "epoch": 2.73, + "learning_rate": 9.074903969270166e-05, + "loss": 0.6523, + "step": 3415 + }, + { + "epoch": 2.73, + "learning_rate": 9.071702944942382e-05, + "loss": 0.6652, + "step": 3416 + }, + { + "epoch": 2.73, + "learning_rate": 9.068501920614597e-05, + "loss": 0.643, + "step": 3417 + }, + { + "epoch": 2.73, + "learning_rate": 9.065300896286813e-05, + "loss": 0.7795, + "step": 3418 + }, + { + "epoch": 2.74, + "learning_rate": 9.062099871959027e-05, + "loss": 0.8565, + "step": 3419 + }, + { + "epoch": 2.74, + "learning_rate": 9.058898847631242e-05, + "loss": 0.8027, + "step": 3420 + }, + { + "epoch": 2.74, + "learning_rate": 9.055697823303457e-05, + "loss": 0.7643, + "step": 3421 + }, + { + "epoch": 2.74, + "learning_rate": 9.052496798975673e-05, + "loss": 0.678, + "step": 3422 + }, + { + "epoch": 2.74, + "learning_rate": 9.049295774647887e-05, + "loss": 0.6925, + "step": 3423 + }, + { + "epoch": 2.74, + "learning_rate": 9.046094750320103e-05, + "loss": 0.8498, + "step": 3424 + }, + { + "epoch": 2.74, + "learning_rate": 9.042893725992318e-05, + "loss": 0.7826, + "step": 3425 + }, + { + "epoch": 2.74, + "learning_rate": 9.039692701664532e-05, + "loss": 0.7964, + "step": 3426 + }, + { + "epoch": 2.74, + "learning_rate": 9.036491677336748e-05, + "loss": 0.7492, + "step": 3427 + }, + { + "epoch": 2.74, + "learning_rate": 9.033290653008963e-05, + "loss": 0.6843, + "step": 3428 + }, + { + "epoch": 2.74, + "learning_rate": 9.030089628681179e-05, + "loss": 0.8092, + "step": 3429 + }, + { + "epoch": 2.74, + "learning_rate": 9.026888604353394e-05, + "loss": 0.7252, + "step": 3430 + }, + { + "epoch": 2.74, + "learning_rate": 9.023687580025608e-05, + "loss": 0.8138, + "step": 3431 + }, + { + "epoch": 2.75, + "learning_rate": 9.020486555697823e-05, + "loss": 0.8043, + "step": 3432 + }, + { + "epoch": 2.75, + "learning_rate": 9.017285531370039e-05, + "loss": 0.8653, + "step": 3433 + }, + { + "epoch": 2.75, + "learning_rate": 9.014084507042254e-05, + "loss": 0.6592, + "step": 3434 + }, + { + "epoch": 2.75, + "learning_rate": 9.01088348271447e-05, + "loss": 0.7373, + "step": 3435 + }, + { + "epoch": 2.75, + "learning_rate": 9.007682458386684e-05, + "loss": 0.7495, + "step": 3436 + }, + { + "epoch": 2.75, + "learning_rate": 9.004481434058899e-05, + "loss": 0.7694, + "step": 3437 + }, + { + "epoch": 2.75, + "learning_rate": 9.001280409731115e-05, + "loss": 0.627, + "step": 3438 + }, + { + "epoch": 2.75, + "learning_rate": 8.99807938540333e-05, + "loss": 0.8217, + "step": 3439 + }, + { + "epoch": 2.75, + "learning_rate": 8.994878361075545e-05, + "loss": 0.849, + "step": 3440 + }, + { + "epoch": 2.75, + "learning_rate": 8.99167733674776e-05, + "loss": 0.7179, + "step": 3441 + }, + { + "epoch": 2.75, + "learning_rate": 8.988476312419975e-05, + "loss": 0.8216, + "step": 3442 + }, + { + "epoch": 2.75, + "learning_rate": 8.985275288092189e-05, + "loss": 0.6905, + "step": 3443 + }, + { + "epoch": 2.76, + "learning_rate": 8.982074263764405e-05, + "loss": 0.7441, + "step": 3444 + }, + { + "epoch": 2.76, + "learning_rate": 8.97887323943662e-05, + "loss": 0.7386, + "step": 3445 + }, + { + "epoch": 2.76, + "learning_rate": 8.975672215108836e-05, + "loss": 0.7467, + "step": 3446 + }, + { + "epoch": 2.76, + "learning_rate": 8.97247119078105e-05, + "loss": 0.8003, + "step": 3447 + }, + { + "epoch": 2.76, + "learning_rate": 8.969270166453265e-05, + "loss": 0.6559, + "step": 3448 + }, + { + "epoch": 2.76, + "learning_rate": 8.966069142125481e-05, + "loss": 0.7651, + "step": 3449 + }, + { + "epoch": 2.76, + "learning_rate": 8.962868117797696e-05, + "loss": 0.7277, + "step": 3450 + }, + { + "epoch": 2.76, + "learning_rate": 8.959667093469912e-05, + "loss": 0.8058, + "step": 3451 + }, + { + "epoch": 2.76, + "learning_rate": 8.956466069142125e-05, + "loss": 0.7561, + "step": 3452 + }, + { + "epoch": 2.76, + "learning_rate": 8.953265044814341e-05, + "loss": 0.8854, + "step": 3453 + }, + { + "epoch": 2.76, + "learning_rate": 8.950064020486555e-05, + "loss": 0.6462, + "step": 3454 + }, + { + "epoch": 2.76, + "learning_rate": 8.946862996158771e-05, + "loss": 0.7167, + "step": 3455 + }, + { + "epoch": 2.76, + "learning_rate": 8.943661971830986e-05, + "loss": 0.8101, + "step": 3456 + }, + { + "epoch": 2.77, + "learning_rate": 8.940460947503202e-05, + "loss": 0.7403, + "step": 3457 + }, + { + "epoch": 2.77, + "learning_rate": 8.937259923175417e-05, + "loss": 0.7644, + "step": 3458 + }, + { + "epoch": 2.77, + "learning_rate": 8.934058898847631e-05, + "loss": 0.8233, + "step": 3459 + }, + { + "epoch": 2.77, + "learning_rate": 8.930857874519847e-05, + "loss": 0.826, + "step": 3460 + }, + { + "epoch": 2.77, + "learning_rate": 8.927656850192062e-05, + "loss": 0.7811, + "step": 3461 + }, + { + "epoch": 2.77, + "learning_rate": 8.924455825864278e-05, + "loss": 0.8728, + "step": 3462 + }, + { + "epoch": 2.77, + "learning_rate": 8.921254801536491e-05, + "loss": 0.794, + "step": 3463 + }, + { + "epoch": 2.77, + "learning_rate": 8.918053777208707e-05, + "loss": 0.753, + "step": 3464 + }, + { + "epoch": 2.77, + "learning_rate": 8.914852752880922e-05, + "loss": 0.9353, + "step": 3465 + }, + { + "epoch": 2.77, + "learning_rate": 8.911651728553138e-05, + "loss": 0.7191, + "step": 3466 + }, + { + "epoch": 2.77, + "learning_rate": 8.908450704225352e-05, + "loss": 0.8647, + "step": 3467 + }, + { + "epoch": 2.77, + "learning_rate": 8.905249679897568e-05, + "loss": 0.7085, + "step": 3468 + }, + { + "epoch": 2.78, + "learning_rate": 8.902048655569783e-05, + "loss": 0.7354, + "step": 3469 + }, + { + "epoch": 2.78, + "learning_rate": 8.898847631241998e-05, + "loss": 0.7524, + "step": 3470 + }, + { + "epoch": 2.78, + "learning_rate": 8.895646606914214e-05, + "loss": 0.7218, + "step": 3471 + }, + { + "epoch": 2.78, + "learning_rate": 8.892445582586428e-05, + "loss": 0.7224, + "step": 3472 + }, + { + "epoch": 2.78, + "learning_rate": 8.889244558258644e-05, + "loss": 0.7233, + "step": 3473 + }, + { + "epoch": 2.78, + "learning_rate": 8.886043533930857e-05, + "loss": 0.6786, + "step": 3474 + }, + { + "epoch": 2.78, + "learning_rate": 8.882842509603073e-05, + "loss": 0.7517, + "step": 3475 + }, + { + "epoch": 2.78, + "learning_rate": 8.879641485275288e-05, + "loss": 0.6633, + "step": 3476 + }, + { + "epoch": 2.78, + "learning_rate": 8.876440460947504e-05, + "loss": 0.7414, + "step": 3477 + }, + { + "epoch": 2.78, + "learning_rate": 8.873239436619719e-05, + "loss": 0.899, + "step": 3478 + }, + { + "epoch": 2.78, + "learning_rate": 8.870038412291933e-05, + "loss": 0.62, + "step": 3479 + }, + { + "epoch": 2.78, + "learning_rate": 8.866837387964149e-05, + "loss": 0.6395, + "step": 3480 + }, + { + "epoch": 2.78, + "learning_rate": 8.863636363636364e-05, + "loss": 0.7531, + "step": 3481 + }, + { + "epoch": 2.79, + "learning_rate": 8.86043533930858e-05, + "loss": 0.7821, + "step": 3482 + }, + { + "epoch": 2.79, + "learning_rate": 8.857234314980794e-05, + "loss": 0.698, + "step": 3483 + }, + { + "epoch": 2.79, + "learning_rate": 8.85403329065301e-05, + "loss": 0.7784, + "step": 3484 + }, + { + "epoch": 2.79, + "learning_rate": 8.850832266325224e-05, + "loss": 0.6644, + "step": 3485 + }, + { + "epoch": 2.79, + "learning_rate": 8.84763124199744e-05, + "loss": 0.7533, + "step": 3486 + }, + { + "epoch": 2.79, + "learning_rate": 8.844430217669654e-05, + "loss": 0.6341, + "step": 3487 + }, + { + "epoch": 2.79, + "learning_rate": 8.84122919334187e-05, + "loss": 0.7153, + "step": 3488 + }, + { + "epoch": 2.79, + "learning_rate": 8.838028169014085e-05, + "loss": 0.7649, + "step": 3489 + }, + { + "epoch": 2.79, + "learning_rate": 8.8348271446863e-05, + "loss": 0.7077, + "step": 3490 + }, + { + "epoch": 2.79, + "learning_rate": 8.831626120358515e-05, + "loss": 0.765, + "step": 3491 + }, + { + "epoch": 2.79, + "learning_rate": 8.82842509603073e-05, + "loss": 0.7951, + "step": 3492 + }, + { + "epoch": 2.79, + "learning_rate": 8.825224071702946e-05, + "loss": 0.6694, + "step": 3493 + }, + { + "epoch": 2.8, + "learning_rate": 8.822023047375161e-05, + "loss": 0.7926, + "step": 3494 + }, + { + "epoch": 2.8, + "learning_rate": 8.818822023047375e-05, + "loss": 0.7126, + "step": 3495 + }, + { + "epoch": 2.8, + "learning_rate": 8.81562099871959e-05, + "loss": 0.8151, + "step": 3496 + }, + { + "epoch": 2.8, + "learning_rate": 8.812419974391806e-05, + "loss": 0.7275, + "step": 3497 + }, + { + "epoch": 2.8, + "learning_rate": 8.80921895006402e-05, + "loss": 0.7165, + "step": 3498 + }, + { + "epoch": 2.8, + "learning_rate": 8.806017925736237e-05, + "loss": 0.7686, + "step": 3499 + }, + { + "epoch": 2.8, + "learning_rate": 8.802816901408451e-05, + "loss": 0.6949, + "step": 3500 + }, + { + "epoch": 2.8, + "learning_rate": 8.799615877080666e-05, + "loss": 0.765, + "step": 3501 + }, + { + "epoch": 2.8, + "learning_rate": 8.796414852752882e-05, + "loss": 0.6952, + "step": 3502 + }, + { + "epoch": 2.8, + "learning_rate": 8.793213828425096e-05, + "loss": 0.8083, + "step": 3503 + }, + { + "epoch": 2.8, + "learning_rate": 8.790012804097312e-05, + "loss": 0.8343, + "step": 3504 + }, + { + "epoch": 2.8, + "learning_rate": 8.786811779769527e-05, + "loss": 0.7339, + "step": 3505 + }, + { + "epoch": 2.8, + "learning_rate": 8.783610755441742e-05, + "loss": 0.6442, + "step": 3506 + }, + { + "epoch": 2.81, + "learning_rate": 8.780409731113956e-05, + "loss": 0.8082, + "step": 3507 + }, + { + "epoch": 2.81, + "learning_rate": 8.777208706786172e-05, + "loss": 0.7931, + "step": 3508 + }, + { + "epoch": 2.81, + "learning_rate": 8.774007682458387e-05, + "loss": 0.7291, + "step": 3509 + }, + { + "epoch": 2.81, + "learning_rate": 8.770806658130603e-05, + "loss": 0.6949, + "step": 3510 + }, + { + "epoch": 2.81, + "learning_rate": 8.767605633802817e-05, + "loss": 0.8709, + "step": 3511 + }, + { + "epoch": 2.81, + "learning_rate": 8.764404609475032e-05, + "loss": 0.739, + "step": 3512 + }, + { + "epoch": 2.81, + "learning_rate": 8.761203585147247e-05, + "loss": 0.7337, + "step": 3513 + }, + { + "epoch": 2.81, + "learning_rate": 8.758002560819463e-05, + "loss": 0.7553, + "step": 3514 + }, + { + "epoch": 2.81, + "learning_rate": 8.754801536491679e-05, + "loss": 0.7309, + "step": 3515 + }, + { + "epoch": 2.81, + "learning_rate": 8.751600512163893e-05, + "loss": 0.7127, + "step": 3516 + }, + { + "epoch": 2.81, + "learning_rate": 8.748399487836108e-05, + "loss": 0.8748, + "step": 3517 + }, + { + "epoch": 2.81, + "learning_rate": 8.745198463508323e-05, + "loss": 0.7254, + "step": 3518 + }, + { + "epoch": 2.82, + "learning_rate": 8.741997439180539e-05, + "loss": 0.6833, + "step": 3519 + }, + { + "epoch": 2.82, + "learning_rate": 8.738796414852753e-05, + "loss": 0.6106, + "step": 3520 + }, + { + "epoch": 2.82, + "learning_rate": 8.735595390524969e-05, + "loss": 0.7595, + "step": 3521 + }, + { + "epoch": 2.82, + "learning_rate": 8.732394366197182e-05, + "loss": 0.6917, + "step": 3522 + }, + { + "epoch": 2.82, + "learning_rate": 8.729193341869398e-05, + "loss": 0.7469, + "step": 3523 + }, + { + "epoch": 2.82, + "learning_rate": 8.725992317541613e-05, + "loss": 0.8689, + "step": 3524 + }, + { + "epoch": 2.82, + "learning_rate": 8.722791293213829e-05, + "loss": 0.721, + "step": 3525 + }, + { + "epoch": 2.82, + "learning_rate": 8.719590268886044e-05, + "loss": 0.736, + "step": 3526 + }, + { + "epoch": 2.82, + "learning_rate": 8.71638924455826e-05, + "loss": 0.8983, + "step": 3527 + }, + { + "epoch": 2.82, + "learning_rate": 8.713188220230474e-05, + "loss": 0.5996, + "step": 3528 + }, + { + "epoch": 2.82, + "learning_rate": 8.709987195902689e-05, + "loss": 0.7452, + "step": 3529 + }, + { + "epoch": 2.82, + "learning_rate": 8.706786171574905e-05, + "loss": 0.704, + "step": 3530 + }, + { + "epoch": 2.82, + "learning_rate": 8.70358514724712e-05, + "loss": 0.7187, + "step": 3531 + }, + { + "epoch": 2.83, + "learning_rate": 8.700384122919335e-05, + "loss": 0.7269, + "step": 3532 + }, + { + "epoch": 2.83, + "learning_rate": 8.697183098591549e-05, + "loss": 0.7099, + "step": 3533 + }, + { + "epoch": 2.83, + "learning_rate": 8.693982074263765e-05, + "loss": 0.736, + "step": 3534 + }, + { + "epoch": 2.83, + "learning_rate": 8.690781049935979e-05, + "loss": 0.6892, + "step": 3535 + }, + { + "epoch": 2.83, + "learning_rate": 8.687580025608195e-05, + "loss": 0.6823, + "step": 3536 + }, + { + "epoch": 2.83, + "learning_rate": 8.68437900128041e-05, + "loss": 0.672, + "step": 3537 + }, + { + "epoch": 2.83, + "learning_rate": 8.681177976952626e-05, + "loss": 0.7149, + "step": 3538 + }, + { + "epoch": 2.83, + "learning_rate": 8.67797695262484e-05, + "loss": 0.7193, + "step": 3539 + }, + { + "epoch": 2.83, + "learning_rate": 8.674775928297055e-05, + "loss": 0.7369, + "step": 3540 + }, + { + "epoch": 2.83, + "learning_rate": 8.671574903969271e-05, + "loss": 0.7006, + "step": 3541 + }, + { + "epoch": 2.83, + "learning_rate": 8.668373879641486e-05, + "loss": 0.8033, + "step": 3542 + }, + { + "epoch": 2.83, + "learning_rate": 8.665172855313702e-05, + "loss": 0.7879, + "step": 3543 + }, + { + "epoch": 2.84, + "learning_rate": 8.661971830985915e-05, + "loss": 0.7547, + "step": 3544 + }, + { + "epoch": 2.84, + "learning_rate": 8.658770806658131e-05, + "loss": 0.7087, + "step": 3545 + }, + { + "epoch": 2.84, + "learning_rate": 8.655569782330346e-05, + "loss": 0.6548, + "step": 3546 + }, + { + "epoch": 2.84, + "learning_rate": 8.652368758002562e-05, + "loss": 0.761, + "step": 3547 + }, + { + "epoch": 2.84, + "learning_rate": 8.649167733674776e-05, + "loss": 0.7218, + "step": 3548 + }, + { + "epoch": 2.84, + "learning_rate": 8.645966709346991e-05, + "loss": 0.7695, + "step": 3549 + }, + { + "epoch": 2.84, + "learning_rate": 8.642765685019207e-05, + "loss": 0.7379, + "step": 3550 + }, + { + "epoch": 2.84, + "learning_rate": 8.639564660691421e-05, + "loss": 0.7341, + "step": 3551 + }, + { + "epoch": 2.84, + "learning_rate": 8.636363636363637e-05, + "loss": 0.7479, + "step": 3552 + }, + { + "epoch": 2.84, + "learning_rate": 8.633162612035852e-05, + "loss": 0.7947, + "step": 3553 + }, + { + "epoch": 2.84, + "learning_rate": 8.629961587708068e-05, + "loss": 0.7179, + "step": 3554 + }, + { + "epoch": 2.84, + "learning_rate": 8.626760563380281e-05, + "loss": 0.7165, + "step": 3555 + }, + { + "epoch": 2.84, + "learning_rate": 8.623559539052497e-05, + "loss": 0.5968, + "step": 3556 + }, + { + "epoch": 2.85, + "learning_rate": 8.620358514724712e-05, + "loss": 0.8374, + "step": 3557 + }, + { + "epoch": 2.85, + "learning_rate": 8.617157490396928e-05, + "loss": 0.724, + "step": 3558 + }, + { + "epoch": 2.85, + "learning_rate": 8.613956466069142e-05, + "loss": 0.7045, + "step": 3559 + }, + { + "epoch": 2.85, + "learning_rate": 8.610755441741357e-05, + "loss": 0.8342, + "step": 3560 + }, + { + "epoch": 2.85, + "learning_rate": 8.607554417413573e-05, + "loss": 0.796, + "step": 3561 + }, + { + "epoch": 2.85, + "learning_rate": 8.604353393085788e-05, + "loss": 0.6322, + "step": 3562 + }, + { + "epoch": 2.85, + "learning_rate": 8.601152368758004e-05, + "loss": 0.744, + "step": 3563 + }, + { + "epoch": 2.85, + "learning_rate": 8.597951344430218e-05, + "loss": 0.7394, + "step": 3564 + }, + { + "epoch": 2.85, + "learning_rate": 8.594750320102433e-05, + "loss": 0.7487, + "step": 3565 + }, + { + "epoch": 2.85, + "learning_rate": 8.591549295774647e-05, + "loss": 0.8474, + "step": 3566 + }, + { + "epoch": 2.85, + "learning_rate": 8.588348271446863e-05, + "loss": 0.6097, + "step": 3567 + }, + { + "epoch": 2.85, + "learning_rate": 8.585147247119078e-05, + "loss": 0.7167, + "step": 3568 + }, + { + "epoch": 2.86, + "learning_rate": 8.581946222791294e-05, + "loss": 0.7208, + "step": 3569 + }, + { + "epoch": 2.86, + "learning_rate": 8.578745198463509e-05, + "loss": 0.7994, + "step": 3570 + }, + { + "epoch": 2.86, + "learning_rate": 8.575544174135723e-05, + "loss": 0.7622, + "step": 3571 + }, + { + "epoch": 2.86, + "learning_rate": 8.572343149807939e-05, + "loss": 0.8342, + "step": 3572 + }, + { + "epoch": 2.86, + "learning_rate": 8.569142125480154e-05, + "loss": 0.6941, + "step": 3573 + }, + { + "epoch": 2.86, + "learning_rate": 8.56594110115237e-05, + "loss": 0.6905, + "step": 3574 + }, + { + "epoch": 2.86, + "learning_rate": 8.562740076824585e-05, + "loss": 0.736, + "step": 3575 + }, + { + "epoch": 2.86, + "learning_rate": 8.559539052496799e-05, + "loss": 0.6689, + "step": 3576 + }, + { + "epoch": 2.86, + "learning_rate": 8.556338028169014e-05, + "loss": 0.6119, + "step": 3577 + }, + { + "epoch": 2.86, + "learning_rate": 8.55313700384123e-05, + "loss": 0.6254, + "step": 3578 + }, + { + "epoch": 2.86, + "learning_rate": 8.549935979513444e-05, + "loss": 0.7582, + "step": 3579 + }, + { + "epoch": 2.86, + "learning_rate": 8.54673495518566e-05, + "loss": 0.6999, + "step": 3580 + }, + { + "epoch": 2.86, + "learning_rate": 8.543533930857875e-05, + "loss": 0.7559, + "step": 3581 + }, + { + "epoch": 2.87, + "learning_rate": 8.54033290653009e-05, + "loss": 0.8938, + "step": 3582 + }, + { + "epoch": 2.87, + "learning_rate": 8.537131882202306e-05, + "loss": 0.7718, + "step": 3583 + }, + { + "epoch": 2.87, + "learning_rate": 8.53393085787452e-05, + "loss": 0.7892, + "step": 3584 + }, + { + "epoch": 2.87, + "learning_rate": 8.530729833546736e-05, + "loss": 0.736, + "step": 3585 + }, + { + "epoch": 2.87, + "learning_rate": 8.527528809218951e-05, + "loss": 0.8282, + "step": 3586 + }, + { + "epoch": 2.87, + "learning_rate": 8.524327784891165e-05, + "loss": 0.8009, + "step": 3587 + }, + { + "epoch": 2.87, + "learning_rate": 8.52112676056338e-05, + "loss": 0.7819, + "step": 3588 + }, + { + "epoch": 2.87, + "learning_rate": 8.517925736235596e-05, + "loss": 0.6807, + "step": 3589 + }, + { + "epoch": 2.87, + "learning_rate": 8.51472471190781e-05, + "loss": 0.684, + "step": 3590 + }, + { + "epoch": 2.87, + "learning_rate": 8.511523687580027e-05, + "loss": 0.6703, + "step": 3591 + }, + { + "epoch": 2.87, + "learning_rate": 8.508322663252241e-05, + "loss": 0.7143, + "step": 3592 + }, + { + "epoch": 2.87, + "learning_rate": 8.505121638924456e-05, + "loss": 0.8074, + "step": 3593 + }, + { + "epoch": 2.88, + "learning_rate": 8.501920614596672e-05, + "loss": 0.8541, + "step": 3594 + }, + { + "epoch": 2.88, + "learning_rate": 8.498719590268886e-05, + "loss": 0.6999, + "step": 3595 + }, + { + "epoch": 2.88, + "learning_rate": 8.495518565941102e-05, + "loss": 0.8229, + "step": 3596 + }, + { + "epoch": 2.88, + "learning_rate": 8.492317541613317e-05, + "loss": 0.8044, + "step": 3597 + }, + { + "epoch": 2.88, + "learning_rate": 8.489116517285532e-05, + "loss": 0.8352, + "step": 3598 + }, + { + "epoch": 2.88, + "learning_rate": 8.485915492957746e-05, + "loss": 0.9506, + "step": 3599 + }, + { + "epoch": 2.88, + "learning_rate": 8.482714468629962e-05, + "loss": 0.7067, + "step": 3600 + }, + { + "epoch": 2.88, + "learning_rate": 8.479513444302177e-05, + "loss": 0.7517, + "step": 3601 + }, + { + "epoch": 2.88, + "learning_rate": 8.476312419974393e-05, + "loss": 0.7876, + "step": 3602 + }, + { + "epoch": 2.88, + "learning_rate": 8.473111395646606e-05, + "loss": 0.7107, + "step": 3603 + }, + { + "epoch": 2.88, + "learning_rate": 8.469910371318822e-05, + "loss": 0.751, + "step": 3604 + }, + { + "epoch": 2.88, + "learning_rate": 8.466709346991038e-05, + "loss": 0.7623, + "step": 3605 + }, + { + "epoch": 2.88, + "learning_rate": 8.463508322663253e-05, + "loss": 0.7535, + "step": 3606 + }, + { + "epoch": 2.89, + "learning_rate": 8.460307298335469e-05, + "loss": 0.8407, + "step": 3607 + }, + { + "epoch": 2.89, + "learning_rate": 8.457106274007683e-05, + "loss": 0.7418, + "step": 3608 + }, + { + "epoch": 2.89, + "learning_rate": 8.453905249679898e-05, + "loss": 0.8314, + "step": 3609 + }, + { + "epoch": 2.89, + "learning_rate": 8.450704225352113e-05, + "loss": 0.7362, + "step": 3610 + }, + { + "epoch": 2.89, + "learning_rate": 8.447503201024329e-05, + "loss": 0.7249, + "step": 3611 + }, + { + "epoch": 2.89, + "learning_rate": 8.444302176696543e-05, + "loss": 0.7365, + "step": 3612 + }, + { + "epoch": 2.89, + "learning_rate": 8.441101152368759e-05, + "loss": 0.7132, + "step": 3613 + }, + { + "epoch": 2.89, + "learning_rate": 8.437900128040972e-05, + "loss": 0.7178, + "step": 3614 + }, + { + "epoch": 2.89, + "learning_rate": 8.434699103713188e-05, + "loss": 0.7676, + "step": 3615 + }, + { + "epoch": 2.89, + "learning_rate": 8.431498079385403e-05, + "loss": 0.7015, + "step": 3616 + }, + { + "epoch": 2.89, + "learning_rate": 8.428297055057619e-05, + "loss": 0.8731, + "step": 3617 + }, + { + "epoch": 2.89, + "learning_rate": 8.425096030729835e-05, + "loss": 0.8121, + "step": 3618 + }, + { + "epoch": 2.9, + "learning_rate": 8.421895006402048e-05, + "loss": 0.7956, + "step": 3619 + }, + { + "epoch": 2.9, + "learning_rate": 8.418693982074264e-05, + "loss": 0.6545, + "step": 3620 + }, + { + "epoch": 2.9, + "learning_rate": 8.415492957746479e-05, + "loss": 0.7237, + "step": 3621 + }, + { + "epoch": 2.9, + "learning_rate": 8.412291933418695e-05, + "loss": 0.7644, + "step": 3622 + }, + { + "epoch": 2.9, + "learning_rate": 8.40909090909091e-05, + "loss": 0.7515, + "step": 3623 + }, + { + "epoch": 2.9, + "learning_rate": 8.405889884763125e-05, + "loss": 0.6998, + "step": 3624 + }, + { + "epoch": 2.9, + "learning_rate": 8.402688860435339e-05, + "loss": 0.7206, + "step": 3625 + }, + { + "epoch": 2.9, + "learning_rate": 8.399487836107555e-05, + "loss": 0.864, + "step": 3626 + }, + { + "epoch": 2.9, + "learning_rate": 8.396286811779769e-05, + "loss": 0.6613, + "step": 3627 + }, + { + "epoch": 2.9, + "learning_rate": 8.393085787451985e-05, + "loss": 0.8676, + "step": 3628 + }, + { + "epoch": 2.9, + "learning_rate": 8.3898847631242e-05, + "loss": 0.7627, + "step": 3629 + }, + { + "epoch": 2.9, + "learning_rate": 8.386683738796415e-05, + "loss": 0.7353, + "step": 3630 + }, + { + "epoch": 2.9, + "learning_rate": 8.38348271446863e-05, + "loss": 0.6283, + "step": 3631 + }, + { + "epoch": 2.91, + "learning_rate": 8.380281690140845e-05, + "loss": 0.6804, + "step": 3632 + }, + { + "epoch": 2.91, + "learning_rate": 8.377080665813061e-05, + "loss": 0.7415, + "step": 3633 + }, + { + "epoch": 2.91, + "learning_rate": 8.373879641485276e-05, + "loss": 0.6528, + "step": 3634 + }, + { + "epoch": 2.91, + "learning_rate": 8.37067861715749e-05, + "loss": 0.8479, + "step": 3635 + }, + { + "epoch": 2.91, + "learning_rate": 8.367477592829705e-05, + "loss": 0.6502, + "step": 3636 + }, + { + "epoch": 2.91, + "learning_rate": 8.364276568501921e-05, + "loss": 0.8436, + "step": 3637 + }, + { + "epoch": 2.91, + "learning_rate": 8.361075544174136e-05, + "loss": 0.7867, + "step": 3638 + }, + { + "epoch": 2.91, + "learning_rate": 8.357874519846352e-05, + "loss": 0.6749, + "step": 3639 + }, + { + "epoch": 2.91, + "learning_rate": 8.354673495518566e-05, + "loss": 0.674, + "step": 3640 + }, + { + "epoch": 2.91, + "learning_rate": 8.351472471190781e-05, + "loss": 0.7523, + "step": 3641 + }, + { + "epoch": 2.91, + "learning_rate": 8.348271446862997e-05, + "loss": 0.774, + "step": 3642 + }, + { + "epoch": 2.91, + "learning_rate": 8.345070422535211e-05, + "loss": 0.7996, + "step": 3643 + }, + { + "epoch": 2.92, + "learning_rate": 8.341869398207427e-05, + "loss": 0.8567, + "step": 3644 + }, + { + "epoch": 2.92, + "learning_rate": 8.338668373879642e-05, + "loss": 0.7258, + "step": 3645 + }, + { + "epoch": 2.92, + "learning_rate": 8.335467349551857e-05, + "loss": 0.6709, + "step": 3646 + }, + { + "epoch": 2.92, + "learning_rate": 8.332266325224071e-05, + "loss": 0.7065, + "step": 3647 + }, + { + "epoch": 2.92, + "learning_rate": 8.329065300896287e-05, + "loss": 0.7192, + "step": 3648 + }, + { + "epoch": 2.92, + "learning_rate": 8.325864276568502e-05, + "loss": 0.6909, + "step": 3649 + }, + { + "epoch": 2.92, + "learning_rate": 8.322663252240718e-05, + "loss": 0.777, + "step": 3650 + }, + { + "epoch": 2.92, + "learning_rate": 8.319462227912932e-05, + "loss": 0.7595, + "step": 3651 + }, + { + "epoch": 2.92, + "learning_rate": 8.316261203585147e-05, + "loss": 0.6704, + "step": 3652 + }, + { + "epoch": 2.92, + "learning_rate": 8.313060179257363e-05, + "loss": 0.8164, + "step": 3653 + }, + { + "epoch": 2.92, + "learning_rate": 8.309859154929578e-05, + "loss": 0.7913, + "step": 3654 + }, + { + "epoch": 2.92, + "learning_rate": 8.306658130601794e-05, + "loss": 0.7166, + "step": 3655 + }, + { + "epoch": 2.92, + "learning_rate": 8.303457106274008e-05, + "loss": 0.8283, + "step": 3656 + }, + { + "epoch": 2.93, + "learning_rate": 8.300256081946223e-05, + "loss": 0.8721, + "step": 3657 + }, + { + "epoch": 2.93, + "learning_rate": 8.297055057618438e-05, + "loss": 0.7705, + "step": 3658 + }, + { + "epoch": 2.93, + "learning_rate": 8.293854033290654e-05, + "loss": 0.6945, + "step": 3659 + }, + { + "epoch": 2.93, + "learning_rate": 8.290653008962868e-05, + "loss": 0.6937, + "step": 3660 + }, + { + "epoch": 2.93, + "learning_rate": 8.287451984635084e-05, + "loss": 0.706, + "step": 3661 + }, + { + "epoch": 2.93, + "learning_rate": 8.284250960307299e-05, + "loss": 0.8217, + "step": 3662 + }, + { + "epoch": 2.93, + "learning_rate": 8.281049935979513e-05, + "loss": 0.7291, + "step": 3663 + }, + { + "epoch": 2.93, + "learning_rate": 8.27784891165173e-05, + "loss": 0.8045, + "step": 3664 + }, + { + "epoch": 2.93, + "learning_rate": 8.274647887323944e-05, + "loss": 0.7404, + "step": 3665 + }, + { + "epoch": 2.93, + "learning_rate": 8.27144686299616e-05, + "loss": 0.8582, + "step": 3666 + }, + { + "epoch": 2.93, + "learning_rate": 8.268245838668375e-05, + "loss": 0.719, + "step": 3667 + }, + { + "epoch": 2.93, + "learning_rate": 8.265044814340589e-05, + "loss": 0.8395, + "step": 3668 + }, + { + "epoch": 2.94, + "learning_rate": 8.261843790012804e-05, + "loss": 0.6515, + "step": 3669 + }, + { + "epoch": 2.94, + "learning_rate": 8.25864276568502e-05, + "loss": 0.7112, + "step": 3670 + }, + { + "epoch": 2.94, + "learning_rate": 8.255441741357234e-05, + "loss": 0.8511, + "step": 3671 + }, + { + "epoch": 2.94, + "learning_rate": 8.25224071702945e-05, + "loss": 0.6876, + "step": 3672 + }, + { + "epoch": 2.94, + "learning_rate": 8.249039692701665e-05, + "loss": 0.847, + "step": 3673 + }, + { + "epoch": 2.94, + "learning_rate": 8.24583866837388e-05, + "loss": 0.8409, + "step": 3674 + }, + { + "epoch": 2.94, + "learning_rate": 8.242637644046096e-05, + "loss": 0.7164, + "step": 3675 + }, + { + "epoch": 2.94, + "learning_rate": 8.23943661971831e-05, + "loss": 0.8521, + "step": 3676 + }, + { + "epoch": 2.94, + "learning_rate": 8.236235595390526e-05, + "loss": 0.7586, + "step": 3677 + }, + { + "epoch": 2.94, + "learning_rate": 8.233034571062741e-05, + "loss": 0.8678, + "step": 3678 + }, + { + "epoch": 2.94, + "learning_rate": 8.229833546734955e-05, + "loss": 0.704, + "step": 3679 + }, + { + "epoch": 2.94, + "learning_rate": 8.22663252240717e-05, + "loss": 0.7828, + "step": 3680 + }, + { + "epoch": 2.94, + "learning_rate": 8.223431498079386e-05, + "loss": 0.7941, + "step": 3681 + }, + { + "epoch": 2.95, + "learning_rate": 8.220230473751601e-05, + "loss": 0.5931, + "step": 3682 + }, + { + "epoch": 2.95, + "learning_rate": 8.217029449423817e-05, + "loss": 0.8906, + "step": 3683 + }, + { + "epoch": 2.95, + "learning_rate": 8.213828425096031e-05, + "loss": 0.7975, + "step": 3684 + }, + { + "epoch": 2.95, + "learning_rate": 8.210627400768246e-05, + "loss": 0.7403, + "step": 3685 + }, + { + "epoch": 2.95, + "learning_rate": 8.207426376440462e-05, + "loss": 0.7005, + "step": 3686 + }, + { + "epoch": 2.95, + "learning_rate": 8.204225352112677e-05, + "loss": 0.9106, + "step": 3687 + }, + { + "epoch": 2.95, + "learning_rate": 8.201024327784892e-05, + "loss": 0.8138, + "step": 3688 + }, + { + "epoch": 2.95, + "learning_rate": 8.197823303457106e-05, + "loss": 0.9695, + "step": 3689 + }, + { + "epoch": 2.95, + "learning_rate": 8.194622279129322e-05, + "loss": 0.8233, + "step": 3690 + }, + { + "epoch": 2.95, + "learning_rate": 8.191421254801536e-05, + "loss": 0.8088, + "step": 3691 + }, + { + "epoch": 2.95, + "learning_rate": 8.188220230473752e-05, + "loss": 0.7851, + "step": 3692 + }, + { + "epoch": 2.95, + "learning_rate": 8.185019206145967e-05, + "loss": 0.7749, + "step": 3693 + }, + { + "epoch": 2.96, + "learning_rate": 8.181818181818183e-05, + "loss": 0.7636, + "step": 3694 + }, + { + "epoch": 2.96, + "learning_rate": 8.178617157490398e-05, + "loss": 0.7958, + "step": 3695 + }, + { + "epoch": 2.96, + "learning_rate": 8.175416133162612e-05, + "loss": 0.7392, + "step": 3696 + }, + { + "epoch": 2.96, + "learning_rate": 8.172215108834828e-05, + "loss": 0.7222, + "step": 3697 + }, + { + "epoch": 2.96, + "learning_rate": 8.169014084507043e-05, + "loss": 0.7105, + "step": 3698 + }, + { + "epoch": 2.96, + "learning_rate": 8.165813060179259e-05, + "loss": 0.7828, + "step": 3699 + }, + { + "epoch": 2.96, + "learning_rate": 8.162612035851472e-05, + "loss": 0.7534, + "step": 3700 + }, + { + "epoch": 2.96, + "learning_rate": 8.159411011523688e-05, + "loss": 0.8409, + "step": 3701 + }, + { + "epoch": 2.96, + "learning_rate": 8.156209987195903e-05, + "loss": 0.8948, + "step": 3702 + }, + { + "epoch": 2.96, + "learning_rate": 8.153008962868119e-05, + "loss": 0.7348, + "step": 3703 + }, + { + "epoch": 2.96, + "learning_rate": 8.149807938540333e-05, + "loss": 0.7404, + "step": 3704 + }, + { + "epoch": 2.96, + "learning_rate": 8.146606914212549e-05, + "loss": 0.6841, + "step": 3705 + }, + { + "epoch": 2.96, + "learning_rate": 8.143405889884762e-05, + "loss": 0.9227, + "step": 3706 + }, + { + "epoch": 2.97, + "learning_rate": 8.140204865556978e-05, + "loss": 0.7035, + "step": 3707 + }, + { + "epoch": 2.97, + "learning_rate": 8.137003841229194e-05, + "loss": 0.7337, + "step": 3708 + }, + { + "epoch": 2.97, + "learning_rate": 8.133802816901409e-05, + "loss": 0.7892, + "step": 3709 + }, + { + "epoch": 2.97, + "learning_rate": 8.130601792573625e-05, + "loss": 0.7154, + "step": 3710 + }, + { + "epoch": 2.97, + "learning_rate": 8.127400768245838e-05, + "loss": 0.7102, + "step": 3711 + }, + { + "epoch": 2.97, + "learning_rate": 8.124199743918054e-05, + "loss": 0.7606, + "step": 3712 + }, + { + "epoch": 2.97, + "learning_rate": 8.120998719590269e-05, + "loss": 0.668, + "step": 3713 + }, + { + "epoch": 2.97, + "learning_rate": 8.117797695262485e-05, + "loss": 0.7594, + "step": 3714 + }, + { + "epoch": 2.97, + "learning_rate": 8.1145966709347e-05, + "loss": 0.6388, + "step": 3715 + }, + { + "epoch": 2.97, + "learning_rate": 8.111395646606914e-05, + "loss": 0.8139, + "step": 3716 + }, + { + "epoch": 2.97, + "learning_rate": 8.108194622279129e-05, + "loss": 0.7878, + "step": 3717 + }, + { + "epoch": 2.97, + "learning_rate": 8.104993597951345e-05, + "loss": 0.7383, + "step": 3718 + }, + { + "epoch": 2.98, + "learning_rate": 8.10179257362356e-05, + "loss": 0.7418, + "step": 3719 + }, + { + "epoch": 2.98, + "learning_rate": 8.098591549295775e-05, + "loss": 0.8998, + "step": 3720 + }, + { + "epoch": 2.98, + "learning_rate": 8.095390524967991e-05, + "loss": 0.623, + "step": 3721 + }, + { + "epoch": 2.98, + "learning_rate": 8.092189500640205e-05, + "loss": 0.7278, + "step": 3722 + }, + { + "epoch": 2.98, + "learning_rate": 8.08898847631242e-05, + "loss": 0.672, + "step": 3723 + }, + { + "epoch": 2.98, + "learning_rate": 8.085787451984635e-05, + "loss": 0.6928, + "step": 3724 + }, + { + "epoch": 2.98, + "learning_rate": 8.082586427656851e-05, + "loss": 0.7628, + "step": 3725 + }, + { + "epoch": 2.98, + "learning_rate": 8.079385403329066e-05, + "loss": 0.8186, + "step": 3726 + }, + { + "epoch": 2.98, + "learning_rate": 8.07618437900128e-05, + "loss": 0.6842, + "step": 3727 + }, + { + "epoch": 2.98, + "learning_rate": 8.072983354673495e-05, + "loss": 0.7558, + "step": 3728 + }, + { + "epoch": 2.98, + "learning_rate": 8.069782330345711e-05, + "loss": 0.7225, + "step": 3729 + }, + { + "epoch": 2.98, + "learning_rate": 8.066581306017926e-05, + "loss": 0.8523, + "step": 3730 + }, + { + "epoch": 2.98, + "learning_rate": 8.063380281690142e-05, + "loss": 0.6979, + "step": 3731 + }, + { + "epoch": 2.99, + "learning_rate": 8.060179257362356e-05, + "loss": 0.6162, + "step": 3732 + }, + { + "epoch": 2.99, + "learning_rate": 8.056978233034571e-05, + "loss": 0.7674, + "step": 3733 + }, + { + "epoch": 2.99, + "learning_rate": 8.053777208706787e-05, + "loss": 0.7547, + "step": 3734 + }, + { + "epoch": 2.99, + "learning_rate": 8.050576184379001e-05, + "loss": 0.7031, + "step": 3735 + }, + { + "epoch": 2.99, + "learning_rate": 8.047375160051217e-05, + "loss": 0.8008, + "step": 3736 + }, + { + "epoch": 2.99, + "learning_rate": 8.044174135723432e-05, + "loss": 0.7118, + "step": 3737 + }, + { + "epoch": 2.99, + "learning_rate": 8.040973111395647e-05, + "loss": 0.8047, + "step": 3738 + }, + { + "epoch": 2.99, + "learning_rate": 8.037772087067861e-05, + "loss": 0.7383, + "step": 3739 + }, + { + "epoch": 2.99, + "learning_rate": 8.034571062740077e-05, + "loss": 0.7255, + "step": 3740 + }, + { + "epoch": 2.99, + "learning_rate": 8.031370038412292e-05, + "loss": 0.6616, + "step": 3741 + }, + { + "epoch": 2.99, + "learning_rate": 8.028169014084508e-05, + "loss": 0.8024, + "step": 3742 + }, + { + "epoch": 2.99, + "learning_rate": 8.024967989756723e-05, + "loss": 0.7322, + "step": 3743 + }, + { + "epoch": 3.0, + "learning_rate": 8.021766965428937e-05, + "loss": 0.7495, + "step": 3744 + }, + { + "epoch": 3.0, + "learning_rate": 8.018565941101153e-05, + "loss": 0.7516, + "step": 3745 + }, + { + "epoch": 3.0, + "learning_rate": 8.015364916773368e-05, + "loss": 0.6855, + "step": 3746 + }, + { + "epoch": 3.0, + "learning_rate": 8.012163892445584e-05, + "loss": 0.7152, + "step": 3747 + }, + { + "epoch": 3.0, + "learning_rate": 8.008962868117798e-05, + "loss": 0.7055, + "step": 3748 + }, + { + "epoch": 3.0, + "learning_rate": 8.005761843790013e-05, + "loss": 0.7009, + "step": 3749 + }, + { + "epoch": 3.0, + "learning_rate": 8.002560819462228e-05, + "loss": 0.7697, + "step": 3750 + }, + { + "epoch": 3.0, + "learning_rate": 7.999359795134444e-05, + "loss": 0.74, + "step": 3751 + }, + { + "epoch": 3.0, + "learning_rate": 7.996158770806658e-05, + "loss": 0.6802, + "step": 3752 + }, + { + "epoch": 3.0, + "learning_rate": 7.992957746478874e-05, + "loss": 0.6822, + "step": 3753 + }, + { + "epoch": 3.0, + "learning_rate": 7.989756722151089e-05, + "loss": 0.7562, + "step": 3754 + }, + { + "epoch": 3.0, + "learning_rate": 7.986555697823303e-05, + "loss": 0.6544, + "step": 3755 + }, + { + "epoch": 3.0, + "learning_rate": 7.98335467349552e-05, + "loss": 0.7546, + "step": 3756 + }, + { + "epoch": 3.01, + "learning_rate": 7.980153649167734e-05, + "loss": 0.7098, + "step": 3757 + }, + { + "epoch": 3.01, + "learning_rate": 7.97695262483995e-05, + "loss": 0.7313, + "step": 3758 + }, + { + "epoch": 3.01, + "learning_rate": 7.973751600512163e-05, + "loss": 0.6639, + "step": 3759 + }, + { + "epoch": 3.01, + "learning_rate": 7.970550576184379e-05, + "loss": 0.6337, + "step": 3760 + }, + { + "epoch": 3.01, + "learning_rate": 7.967349551856594e-05, + "loss": 0.7449, + "step": 3761 + }, + { + "epoch": 3.01, + "learning_rate": 7.96414852752881e-05, + "loss": 0.6779, + "step": 3762 + }, + { + "epoch": 3.01, + "learning_rate": 7.960947503201024e-05, + "loss": 0.637, + "step": 3763 + }, + { + "epoch": 3.01, + "learning_rate": 7.95774647887324e-05, + "loss": 0.735, + "step": 3764 + }, + { + "epoch": 3.01, + "learning_rate": 7.954545454545455e-05, + "loss": 0.7193, + "step": 3765 + }, + { + "epoch": 3.01, + "learning_rate": 7.95134443021767e-05, + "loss": 0.7259, + "step": 3766 + }, + { + "epoch": 3.01, + "learning_rate": 7.948143405889886e-05, + "loss": 0.6523, + "step": 3767 + }, + { + "epoch": 3.01, + "learning_rate": 7.9449423815621e-05, + "loss": 0.6978, + "step": 3768 + }, + { + "epoch": 3.02, + "learning_rate": 7.941741357234316e-05, + "loss": 0.7216, + "step": 3769 + }, + { + "epoch": 3.02, + "learning_rate": 7.93854033290653e-05, + "loss": 0.599, + "step": 3770 + }, + { + "epoch": 3.02, + "learning_rate": 7.935339308578746e-05, + "loss": 0.7014, + "step": 3771 + }, + { + "epoch": 3.02, + "learning_rate": 7.93213828425096e-05, + "loss": 0.6542, + "step": 3772 + }, + { + "epoch": 3.02, + "learning_rate": 7.928937259923176e-05, + "loss": 0.802, + "step": 3773 + }, + { + "epoch": 3.02, + "learning_rate": 7.925736235595391e-05, + "loss": 0.778, + "step": 3774 + }, + { + "epoch": 3.02, + "learning_rate": 7.922535211267607e-05, + "loss": 0.6697, + "step": 3775 + }, + { + "epoch": 3.02, + "learning_rate": 7.919334186939821e-05, + "loss": 0.7476, + "step": 3776 + }, + { + "epoch": 3.02, + "learning_rate": 7.916133162612036e-05, + "loss": 0.7107, + "step": 3777 + }, + { + "epoch": 3.02, + "learning_rate": 7.912932138284252e-05, + "loss": 0.7611, + "step": 3778 + }, + { + "epoch": 3.02, + "learning_rate": 7.909731113956467e-05, + "loss": 0.5822, + "step": 3779 + }, + { + "epoch": 3.02, + "learning_rate": 7.906530089628683e-05, + "loss": 0.6586, + "step": 3780 + }, + { + "epoch": 3.02, + "learning_rate": 7.903329065300896e-05, + "loss": 0.7613, + "step": 3781 + }, + { + "epoch": 3.03, + "learning_rate": 7.900128040973112e-05, + "loss": 0.7804, + "step": 3782 + }, + { + "epoch": 3.03, + "learning_rate": 7.896927016645326e-05, + "loss": 0.7141, + "step": 3783 + }, + { + "epoch": 3.03, + "learning_rate": 7.893725992317542e-05, + "loss": 0.7197, + "step": 3784 + }, + { + "epoch": 3.03, + "learning_rate": 7.890524967989757e-05, + "loss": 0.7279, + "step": 3785 + }, + { + "epoch": 3.03, + "learning_rate": 7.887323943661972e-05, + "loss": 0.7078, + "step": 3786 + }, + { + "epoch": 3.03, + "learning_rate": 7.884122919334188e-05, + "loss": 0.6666, + "step": 3787 + }, + { + "epoch": 3.03, + "learning_rate": 7.880921895006402e-05, + "loss": 0.8436, + "step": 3788 + }, + { + "epoch": 3.03, + "learning_rate": 7.877720870678618e-05, + "loss": 0.7693, + "step": 3789 + }, + { + "epoch": 3.03, + "learning_rate": 7.874519846350833e-05, + "loss": 0.6345, + "step": 3790 + }, + { + "epoch": 3.03, + "learning_rate": 7.871318822023049e-05, + "loss": 0.5932, + "step": 3791 + }, + { + "epoch": 3.03, + "learning_rate": 7.868117797695262e-05, + "loss": 0.7152, + "step": 3792 + }, + { + "epoch": 3.03, + "learning_rate": 7.864916773367478e-05, + "loss": 0.7746, + "step": 3793 + }, + { + "epoch": 3.04, + "learning_rate": 7.861715749039693e-05, + "loss": 0.6455, + "step": 3794 + }, + { + "epoch": 3.04, + "learning_rate": 7.858514724711909e-05, + "loss": 0.6861, + "step": 3795 + }, + { + "epoch": 3.04, + "learning_rate": 7.855313700384123e-05, + "loss": 0.719, + "step": 3796 + }, + { + "epoch": 3.04, + "learning_rate": 7.852112676056338e-05, + "loss": 0.7621, + "step": 3797 + }, + { + "epoch": 3.04, + "learning_rate": 7.848911651728554e-05, + "loss": 0.5869, + "step": 3798 + }, + { + "epoch": 3.04, + "learning_rate": 7.845710627400769e-05, + "loss": 0.7731, + "step": 3799 + }, + { + "epoch": 3.04, + "learning_rate": 7.842509603072985e-05, + "loss": 0.7095, + "step": 3800 + }, + { + "epoch": 3.04, + "learning_rate": 7.839308578745199e-05, + "loss": 0.8139, + "step": 3801 + }, + { + "epoch": 3.04, + "learning_rate": 7.836107554417414e-05, + "loss": 0.7516, + "step": 3802 + }, + { + "epoch": 3.04, + "learning_rate": 7.832906530089628e-05, + "loss": 0.6285, + "step": 3803 + }, + { + "epoch": 3.04, + "learning_rate": 7.829705505761844e-05, + "loss": 0.689, + "step": 3804 + }, + { + "epoch": 3.04, + "learning_rate": 7.826504481434059e-05, + "loss": 0.6985, + "step": 3805 + }, + { + "epoch": 3.04, + "learning_rate": 7.823303457106275e-05, + "loss": 0.7888, + "step": 3806 + }, + { + "epoch": 3.05, + "learning_rate": 7.82010243277849e-05, + "loss": 0.636, + "step": 3807 + }, + { + "epoch": 3.05, + "learning_rate": 7.816901408450704e-05, + "loss": 0.7139, + "step": 3808 + }, + { + "epoch": 3.05, + "learning_rate": 7.813700384122919e-05, + "loss": 0.6646, + "step": 3809 + }, + { + "epoch": 3.05, + "learning_rate": 7.810499359795135e-05, + "loss": 0.6906, + "step": 3810 + }, + { + "epoch": 3.05, + "learning_rate": 7.807298335467351e-05, + "loss": 0.6766, + "step": 3811 + }, + { + "epoch": 3.05, + "learning_rate": 7.804097311139565e-05, + "loss": 0.7609, + "step": 3812 + }, + { + "epoch": 3.05, + "learning_rate": 7.80089628681178e-05, + "loss": 0.6974, + "step": 3813 + }, + { + "epoch": 3.05, + "learning_rate": 7.797695262483995e-05, + "loss": 0.7329, + "step": 3814 + }, + { + "epoch": 3.05, + "learning_rate": 7.79449423815621e-05, + "loss": 0.6753, + "step": 3815 + }, + { + "epoch": 3.05, + "learning_rate": 7.791293213828425e-05, + "loss": 0.791, + "step": 3816 + }, + { + "epoch": 3.05, + "learning_rate": 7.788092189500641e-05, + "loss": 0.5685, + "step": 3817 + }, + { + "epoch": 3.05, + "learning_rate": 7.784891165172856e-05, + "loss": 0.7327, + "step": 3818 + }, + { + "epoch": 3.06, + "learning_rate": 7.78169014084507e-05, + "loss": 0.5643, + "step": 3819 + }, + { + "epoch": 3.06, + "learning_rate": 7.778489116517285e-05, + "loss": 0.6848, + "step": 3820 + }, + { + "epoch": 3.06, + "learning_rate": 7.775288092189501e-05, + "loss": 0.682, + "step": 3821 + }, + { + "epoch": 3.06, + "learning_rate": 7.772087067861716e-05, + "loss": 0.7658, + "step": 3822 + }, + { + "epoch": 3.06, + "learning_rate": 7.768886043533932e-05, + "loss": 0.7359, + "step": 3823 + }, + { + "epoch": 3.06, + "learning_rate": 7.765685019206146e-05, + "loss": 0.7371, + "step": 3824 + }, + { + "epoch": 3.06, + "learning_rate": 7.762483994878361e-05, + "loss": 0.8346, + "step": 3825 + }, + { + "epoch": 3.06, + "learning_rate": 7.759282970550577e-05, + "loss": 0.6247, + "step": 3826 + }, + { + "epoch": 3.06, + "learning_rate": 7.756081946222792e-05, + "loss": 0.7248, + "step": 3827 + }, + { + "epoch": 3.06, + "learning_rate": 7.752880921895008e-05, + "loss": 0.6926, + "step": 3828 + }, + { + "epoch": 3.06, + "learning_rate": 7.749679897567221e-05, + "loss": 0.8102, + "step": 3829 + }, + { + "epoch": 3.06, + "learning_rate": 7.746478873239437e-05, + "loss": 0.7796, + "step": 3830 + }, + { + "epoch": 3.06, + "learning_rate": 7.743277848911651e-05, + "loss": 0.6873, + "step": 3831 + }, + { + "epoch": 3.07, + "learning_rate": 7.740076824583867e-05, + "loss": 0.6759, + "step": 3832 + }, + { + "epoch": 3.07, + "learning_rate": 7.736875800256082e-05, + "loss": 0.7456, + "step": 3833 + }, + { + "epoch": 3.07, + "learning_rate": 7.733674775928298e-05, + "loss": 0.7496, + "step": 3834 + }, + { + "epoch": 3.07, + "learning_rate": 7.730473751600513e-05, + "loss": 0.7198, + "step": 3835 + }, + { + "epoch": 3.07, + "learning_rate": 7.727272727272727e-05, + "loss": 0.7493, + "step": 3836 + }, + { + "epoch": 3.07, + "learning_rate": 7.724071702944943e-05, + "loss": 0.7094, + "step": 3837 + }, + { + "epoch": 3.07, + "learning_rate": 7.720870678617158e-05, + "loss": 0.6554, + "step": 3838 + }, + { + "epoch": 3.07, + "learning_rate": 7.717669654289374e-05, + "loss": 0.7692, + "step": 3839 + }, + { + "epoch": 3.07, + "learning_rate": 7.714468629961587e-05, + "loss": 0.6579, + "step": 3840 + }, + { + "epoch": 3.07, + "learning_rate": 7.711267605633803e-05, + "loss": 0.7497, + "step": 3841 + }, + { + "epoch": 3.07, + "learning_rate": 7.708066581306018e-05, + "loss": 0.7548, + "step": 3842 + }, + { + "epoch": 3.07, + "learning_rate": 7.704865556978234e-05, + "loss": 0.8296, + "step": 3843 + }, + { + "epoch": 3.08, + "learning_rate": 7.701664532650448e-05, + "loss": 0.6827, + "step": 3844 + }, + { + "epoch": 3.08, + "learning_rate": 7.698463508322664e-05, + "loss": 0.7529, + "step": 3845 + }, + { + "epoch": 3.08, + "learning_rate": 7.695262483994879e-05, + "loss": 0.7063, + "step": 3846 + }, + { + "epoch": 3.08, + "learning_rate": 7.692061459667093e-05, + "loss": 0.7692, + "step": 3847 + }, + { + "epoch": 3.08, + "learning_rate": 7.68886043533931e-05, + "loss": 0.6763, + "step": 3848 + }, + { + "epoch": 3.08, + "learning_rate": 7.685659411011524e-05, + "loss": 0.6263, + "step": 3849 + }, + { + "epoch": 3.08, + "learning_rate": 7.68245838668374e-05, + "loss": 0.7003, + "step": 3850 + }, + { + "epoch": 3.08, + "learning_rate": 7.679257362355953e-05, + "loss": 0.6913, + "step": 3851 + }, + { + "epoch": 3.08, + "learning_rate": 7.676056338028169e-05, + "loss": 0.708, + "step": 3852 + }, + { + "epoch": 3.08, + "learning_rate": 7.672855313700384e-05, + "loss": 0.7072, + "step": 3853 + }, + { + "epoch": 3.08, + "learning_rate": 7.6696542893726e-05, + "loss": 0.5925, + "step": 3854 + }, + { + "epoch": 3.08, + "learning_rate": 7.666453265044815e-05, + "loss": 0.7152, + "step": 3855 + }, + { + "epoch": 3.08, + "learning_rate": 7.663252240717029e-05, + "loss": 0.6288, + "step": 3856 + }, + { + "epoch": 3.09, + "learning_rate": 7.660051216389245e-05, + "loss": 0.7122, + "step": 3857 + }, + { + "epoch": 3.09, + "learning_rate": 7.65685019206146e-05, + "loss": 0.6737, + "step": 3858 + }, + { + "epoch": 3.09, + "learning_rate": 7.653649167733676e-05, + "loss": 0.5596, + "step": 3859 + }, + { + "epoch": 3.09, + "learning_rate": 7.65044814340589e-05, + "loss": 0.7865, + "step": 3860 + }, + { + "epoch": 3.09, + "learning_rate": 7.647247119078106e-05, + "loss": 0.651, + "step": 3861 + }, + { + "epoch": 3.09, + "learning_rate": 7.64404609475032e-05, + "loss": 0.5874, + "step": 3862 + }, + { + "epoch": 3.09, + "learning_rate": 7.640845070422536e-05, + "loss": 0.6591, + "step": 3863 + }, + { + "epoch": 3.09, + "learning_rate": 7.63764404609475e-05, + "loss": 0.6755, + "step": 3864 + }, + { + "epoch": 3.09, + "learning_rate": 7.634443021766966e-05, + "loss": 0.6302, + "step": 3865 + }, + { + "epoch": 3.09, + "learning_rate": 7.631241997439181e-05, + "loss": 0.6733, + "step": 3866 + }, + { + "epoch": 3.09, + "learning_rate": 7.628040973111395e-05, + "loss": 0.6811, + "step": 3867 + }, + { + "epoch": 3.09, + "learning_rate": 7.624839948783611e-05, + "loss": 0.7985, + "step": 3868 + }, + { + "epoch": 3.1, + "learning_rate": 7.621638924455826e-05, + "loss": 0.7298, + "step": 3869 + }, + { + "epoch": 3.1, + "learning_rate": 7.618437900128042e-05, + "loss": 0.6692, + "step": 3870 + }, + { + "epoch": 3.1, + "learning_rate": 7.615236875800257e-05, + "loss": 0.7209, + "step": 3871 + }, + { + "epoch": 3.1, + "learning_rate": 7.612035851472471e-05, + "loss": 0.684, + "step": 3872 + }, + { + "epoch": 3.1, + "learning_rate": 7.608834827144686e-05, + "loss": 0.7312, + "step": 3873 + }, + { + "epoch": 3.1, + "learning_rate": 7.605633802816902e-05, + "loss": 0.6406, + "step": 3874 + }, + { + "epoch": 3.1, + "learning_rate": 7.602432778489116e-05, + "loss": 0.7578, + "step": 3875 + }, + { + "epoch": 3.1, + "learning_rate": 7.599231754161332e-05, + "loss": 0.6969, + "step": 3876 + }, + { + "epoch": 3.1, + "learning_rate": 7.596030729833547e-05, + "loss": 0.6936, + "step": 3877 + }, + { + "epoch": 3.1, + "learning_rate": 7.592829705505762e-05, + "loss": 0.6553, + "step": 3878 + }, + { + "epoch": 3.1, + "learning_rate": 7.589628681177978e-05, + "loss": 0.852, + "step": 3879 + }, + { + "epoch": 3.1, + "learning_rate": 7.586427656850192e-05, + "loss": 0.6684, + "step": 3880 + }, + { + "epoch": 3.1, + "learning_rate": 7.583226632522408e-05, + "loss": 0.7124, + "step": 3881 + }, + { + "epoch": 3.11, + "learning_rate": 7.580025608194623e-05, + "loss": 0.6617, + "step": 3882 + }, + { + "epoch": 3.11, + "learning_rate": 7.576824583866838e-05, + "loss": 0.6071, + "step": 3883 + }, + { + "epoch": 3.11, + "learning_rate": 7.573623559539052e-05, + "loss": 0.6147, + "step": 3884 + }, + { + "epoch": 3.11, + "learning_rate": 7.570422535211268e-05, + "loss": 0.6941, + "step": 3885 + }, + { + "epoch": 3.11, + "learning_rate": 7.567221510883483e-05, + "loss": 0.751, + "step": 3886 + }, + { + "epoch": 3.11, + "learning_rate": 7.564020486555699e-05, + "loss": 0.6525, + "step": 3887 + }, + { + "epoch": 3.11, + "learning_rate": 7.560819462227913e-05, + "loss": 0.6945, + "step": 3888 + }, + { + "epoch": 3.11, + "learning_rate": 7.557618437900128e-05, + "loss": 0.7396, + "step": 3889 + }, + { + "epoch": 3.11, + "learning_rate": 7.554417413572344e-05, + "loss": 0.6141, + "step": 3890 + }, + { + "epoch": 3.11, + "learning_rate": 7.551216389244559e-05, + "loss": 0.5816, + "step": 3891 + }, + { + "epoch": 3.11, + "learning_rate": 7.548015364916775e-05, + "loss": 0.779, + "step": 3892 + }, + { + "epoch": 3.11, + "learning_rate": 7.544814340588989e-05, + "loss": 0.6224, + "step": 3893 + }, + { + "epoch": 3.12, + "learning_rate": 7.541613316261204e-05, + "loss": 0.7992, + "step": 3894 + }, + { + "epoch": 3.12, + "learning_rate": 7.538412291933418e-05, + "loss": 0.5978, + "step": 3895 + }, + { + "epoch": 3.12, + "learning_rate": 7.535211267605634e-05, + "loss": 0.6513, + "step": 3896 + }, + { + "epoch": 3.12, + "learning_rate": 7.532010243277849e-05, + "loss": 0.7274, + "step": 3897 + }, + { + "epoch": 3.12, + "learning_rate": 7.528809218950065e-05, + "loss": 0.7398, + "step": 3898 + }, + { + "epoch": 3.12, + "learning_rate": 7.525608194622278e-05, + "loss": 0.734, + "step": 3899 + }, + { + "epoch": 3.12, + "learning_rate": 7.522407170294494e-05, + "loss": 0.7032, + "step": 3900 + }, + { + "epoch": 3.12, + "learning_rate": 7.51920614596671e-05, + "loss": 0.6029, + "step": 3901 + }, + { + "epoch": 3.12, + "learning_rate": 7.516005121638925e-05, + "loss": 0.6849, + "step": 3902 + }, + { + "epoch": 3.12, + "learning_rate": 7.512804097311141e-05, + "loss": 0.7357, + "step": 3903 + }, + { + "epoch": 3.12, + "learning_rate": 7.509603072983355e-05, + "loss": 0.7815, + "step": 3904 + }, + { + "epoch": 3.12, + "learning_rate": 7.50640204865557e-05, + "loss": 0.6738, + "step": 3905 + }, + { + "epoch": 3.12, + "learning_rate": 7.503201024327785e-05, + "loss": 0.5864, + "step": 3906 + }, + { + "epoch": 3.13, + "learning_rate": 7.500000000000001e-05, + "loss": 0.7187, + "step": 3907 + }, + { + "epoch": 3.13, + "learning_rate": 7.496798975672215e-05, + "loss": 0.675, + "step": 3908 + }, + { + "epoch": 3.13, + "learning_rate": 7.493597951344431e-05, + "loss": 0.7426, + "step": 3909 + }, + { + "epoch": 3.13, + "learning_rate": 7.490396927016645e-05, + "loss": 0.8085, + "step": 3910 + }, + { + "epoch": 3.13, + "learning_rate": 7.48719590268886e-05, + "loss": 0.6964, + "step": 3911 + }, + { + "epoch": 3.13, + "learning_rate": 7.483994878361075e-05, + "loss": 0.6115, + "step": 3912 + }, + { + "epoch": 3.13, + "learning_rate": 7.480793854033291e-05, + "loss": 0.4978, + "step": 3913 + }, + { + "epoch": 3.13, + "learning_rate": 7.477592829705507e-05, + "loss": 0.7665, + "step": 3914 + }, + { + "epoch": 3.13, + "learning_rate": 7.474391805377722e-05, + "loss": 0.7467, + "step": 3915 + }, + { + "epoch": 3.13, + "learning_rate": 7.471190781049936e-05, + "loss": 0.7245, + "step": 3916 + }, + { + "epoch": 3.13, + "learning_rate": 7.467989756722151e-05, + "loss": 0.7538, + "step": 3917 + }, + { + "epoch": 3.13, + "learning_rate": 7.464788732394367e-05, + "loss": 0.7665, + "step": 3918 + }, + { + "epoch": 3.14, + "learning_rate": 7.461587708066582e-05, + "loss": 0.6474, + "step": 3919 + }, + { + "epoch": 3.14, + "learning_rate": 7.458386683738798e-05, + "loss": 0.7168, + "step": 3920 + }, + { + "epoch": 3.14, + "learning_rate": 7.455185659411011e-05, + "loss": 0.8139, + "step": 3921 + }, + { + "epoch": 3.14, + "learning_rate": 7.451984635083227e-05, + "loss": 0.8239, + "step": 3922 + }, + { + "epoch": 3.14, + "learning_rate": 7.448783610755441e-05, + "loss": 0.7044, + "step": 3923 + }, + { + "epoch": 3.14, + "learning_rate": 7.445582586427657e-05, + "loss": 0.7448, + "step": 3924 + }, + { + "epoch": 3.14, + "learning_rate": 7.442381562099872e-05, + "loss": 0.8341, + "step": 3925 + }, + { + "epoch": 3.14, + "learning_rate": 7.439180537772087e-05, + "loss": 0.6223, + "step": 3926 + }, + { + "epoch": 3.14, + "learning_rate": 7.435979513444303e-05, + "loss": 0.6243, + "step": 3927 + }, + { + "epoch": 3.14, + "learning_rate": 7.432778489116517e-05, + "loss": 0.7108, + "step": 3928 + }, + { + "epoch": 3.14, + "learning_rate": 7.429577464788733e-05, + "loss": 0.7716, + "step": 3929 + }, + { + "epoch": 3.14, + "learning_rate": 7.426376440460948e-05, + "loss": 0.669, + "step": 3930 + }, + { + "epoch": 3.14, + "learning_rate": 7.423175416133164e-05, + "loss": 0.7142, + "step": 3931 + }, + { + "epoch": 3.15, + "learning_rate": 7.419974391805377e-05, + "loss": 0.7969, + "step": 3932 + }, + { + "epoch": 3.15, + "learning_rate": 7.416773367477593e-05, + "loss": 0.6874, + "step": 3933 + }, + { + "epoch": 3.15, + "learning_rate": 7.413572343149808e-05, + "loss": 0.631, + "step": 3934 + }, + { + "epoch": 3.15, + "learning_rate": 7.410371318822024e-05, + "loss": 0.6382, + "step": 3935 + }, + { + "epoch": 3.15, + "learning_rate": 7.407170294494238e-05, + "loss": 0.7322, + "step": 3936 + }, + { + "epoch": 3.15, + "learning_rate": 7.403969270166453e-05, + "loss": 0.5992, + "step": 3937 + }, + { + "epoch": 3.15, + "learning_rate": 7.400768245838669e-05, + "loss": 0.7708, + "step": 3938 + }, + { + "epoch": 3.15, + "learning_rate": 7.397567221510884e-05, + "loss": 0.7074, + "step": 3939 + }, + { + "epoch": 3.15, + "learning_rate": 7.3943661971831e-05, + "loss": 0.663, + "step": 3940 + }, + { + "epoch": 3.15, + "learning_rate": 7.391165172855314e-05, + "loss": 0.7439, + "step": 3941 + }, + { + "epoch": 3.15, + "learning_rate": 7.38796414852753e-05, + "loss": 0.6702, + "step": 3942 + }, + { + "epoch": 3.15, + "learning_rate": 7.384763124199743e-05, + "loss": 0.628, + "step": 3943 + }, + { + "epoch": 3.16, + "learning_rate": 7.38156209987196e-05, + "loss": 0.7106, + "step": 3944 + }, + { + "epoch": 3.16, + "learning_rate": 7.378361075544174e-05, + "loss": 0.7252, + "step": 3945 + }, + { + "epoch": 3.16, + "learning_rate": 7.37516005121639e-05, + "loss": 0.6537, + "step": 3946 + }, + { + "epoch": 3.16, + "learning_rate": 7.371959026888605e-05, + "loss": 0.6919, + "step": 3947 + }, + { + "epoch": 3.16, + "learning_rate": 7.368758002560819e-05, + "loss": 0.6448, + "step": 3948 + }, + { + "epoch": 3.16, + "learning_rate": 7.365556978233035e-05, + "loss": 0.7117, + "step": 3949 + }, + { + "epoch": 3.16, + "learning_rate": 7.36235595390525e-05, + "loss": 0.6988, + "step": 3950 + }, + { + "epoch": 3.16, + "learning_rate": 7.359154929577466e-05, + "loss": 0.6457, + "step": 3951 + }, + { + "epoch": 3.16, + "learning_rate": 7.35595390524968e-05, + "loss": 0.6315, + "step": 3952 + }, + { + "epoch": 3.16, + "learning_rate": 7.352752880921895e-05, + "loss": 0.7792, + "step": 3953 + }, + { + "epoch": 3.16, + "learning_rate": 7.34955185659411e-05, + "loss": 0.8546, + "step": 3954 + }, + { + "epoch": 3.16, + "learning_rate": 7.346350832266326e-05, + "loss": 0.7533, + "step": 3955 + }, + { + "epoch": 3.16, + "learning_rate": 7.34314980793854e-05, + "loss": 0.6927, + "step": 3956 + }, + { + "epoch": 3.17, + "learning_rate": 7.339948783610756e-05, + "loss": 0.6552, + "step": 3957 + }, + { + "epoch": 3.17, + "learning_rate": 7.336747759282971e-05, + "loss": 0.7095, + "step": 3958 + }, + { + "epoch": 3.17, + "learning_rate": 7.333546734955186e-05, + "loss": 0.8116, + "step": 3959 + }, + { + "epoch": 3.17, + "learning_rate": 7.330345710627401e-05, + "loss": 0.7088, + "step": 3960 + }, + { + "epoch": 3.17, + "learning_rate": 7.327144686299616e-05, + "loss": 0.7238, + "step": 3961 + }, + { + "epoch": 3.17, + "learning_rate": 7.323943661971832e-05, + "loss": 0.7483, + "step": 3962 + }, + { + "epoch": 3.17, + "learning_rate": 7.320742637644047e-05, + "loss": 0.6379, + "step": 3963 + }, + { + "epoch": 3.17, + "learning_rate": 7.317541613316261e-05, + "loss": 0.6307, + "step": 3964 + }, + { + "epoch": 3.17, + "learning_rate": 7.314340588988476e-05, + "loss": 0.7813, + "step": 3965 + }, + { + "epoch": 3.17, + "learning_rate": 7.311139564660692e-05, + "loss": 0.7655, + "step": 3966 + }, + { + "epoch": 3.17, + "learning_rate": 7.307938540332907e-05, + "loss": 0.698, + "step": 3967 + }, + { + "epoch": 3.17, + "learning_rate": 7.304737516005123e-05, + "loss": 0.666, + "step": 3968 + }, + { + "epoch": 3.18, + "learning_rate": 7.301536491677337e-05, + "loss": 0.708, + "step": 3969 + }, + { + "epoch": 3.18, + "learning_rate": 7.298335467349552e-05, + "loss": 0.6959, + "step": 3970 + }, + { + "epoch": 3.18, + "learning_rate": 7.295134443021768e-05, + "loss": 0.6826, + "step": 3971 + }, + { + "epoch": 3.18, + "learning_rate": 7.291933418693982e-05, + "loss": 0.6443, + "step": 3972 + }, + { + "epoch": 3.18, + "learning_rate": 7.288732394366198e-05, + "loss": 0.7073, + "step": 3973 + }, + { + "epoch": 3.18, + "learning_rate": 7.285531370038413e-05, + "loss": 0.6569, + "step": 3974 + }, + { + "epoch": 3.18, + "learning_rate": 7.282330345710628e-05, + "loss": 0.6656, + "step": 3975 + }, + { + "epoch": 3.18, + "learning_rate": 7.279129321382842e-05, + "loss": 0.656, + "step": 3976 + }, + { + "epoch": 3.18, + "learning_rate": 7.275928297055058e-05, + "loss": 0.7316, + "step": 3977 + }, + { + "epoch": 3.18, + "learning_rate": 7.272727272727273e-05, + "loss": 0.8091, + "step": 3978 + }, + { + "epoch": 3.18, + "learning_rate": 7.269526248399489e-05, + "loss": 0.6174, + "step": 3979 + }, + { + "epoch": 3.18, + "learning_rate": 7.266325224071703e-05, + "loss": 0.6677, + "step": 3980 + }, + { + "epoch": 3.18, + "learning_rate": 7.263124199743918e-05, + "loss": 0.6702, + "step": 3981 + }, + { + "epoch": 3.19, + "learning_rate": 7.259923175416134e-05, + "loss": 0.6666, + "step": 3982 + }, + { + "epoch": 3.19, + "learning_rate": 7.256722151088349e-05, + "loss": 0.6259, + "step": 3983 + }, + { + "epoch": 3.19, + "learning_rate": 7.253521126760565e-05, + "loss": 0.6483, + "step": 3984 + }, + { + "epoch": 3.19, + "learning_rate": 7.250320102432779e-05, + "loss": 0.6655, + "step": 3985 + }, + { + "epoch": 3.19, + "learning_rate": 7.247119078104994e-05, + "loss": 0.6544, + "step": 3986 + }, + { + "epoch": 3.19, + "learning_rate": 7.243918053777209e-05, + "loss": 0.6454, + "step": 3987 + }, + { + "epoch": 3.19, + "learning_rate": 7.240717029449424e-05, + "loss": 0.7302, + "step": 3988 + }, + { + "epoch": 3.19, + "learning_rate": 7.237516005121639e-05, + "loss": 0.6294, + "step": 3989 + }, + { + "epoch": 3.19, + "learning_rate": 7.234314980793855e-05, + "loss": 0.6582, + "step": 3990 + }, + { + "epoch": 3.19, + "learning_rate": 7.23111395646607e-05, + "loss": 0.7087, + "step": 3991 + }, + { + "epoch": 3.19, + "learning_rate": 7.227912932138284e-05, + "loss": 0.6124, + "step": 3992 + }, + { + "epoch": 3.19, + "learning_rate": 7.2247119078105e-05, + "loss": 0.7594, + "step": 3993 + }, + { + "epoch": 3.2, + "learning_rate": 7.221510883482715e-05, + "loss": 0.7401, + "step": 3994 + }, + { + "epoch": 3.2, + "learning_rate": 7.218309859154931e-05, + "loss": 0.6867, + "step": 3995 + }, + { + "epoch": 3.2, + "learning_rate": 7.215108834827144e-05, + "loss": 0.8066, + "step": 3996 + }, + { + "epoch": 3.2, + "learning_rate": 7.21190781049936e-05, + "loss": 0.7565, + "step": 3997 + }, + { + "epoch": 3.2, + "learning_rate": 7.208706786171575e-05, + "loss": 0.7574, + "step": 3998 + }, + { + "epoch": 3.2, + "learning_rate": 7.205505761843791e-05, + "loss": 0.6164, + "step": 3999 + }, + { + "epoch": 3.2, + "learning_rate": 7.202304737516005e-05, + "loss": 0.8545, + "step": 4000 + }, + { + "epoch": 3.2, + "learning_rate": 7.199103713188221e-05, + "loss": 0.7048, + "step": 4001 + }, + { + "epoch": 3.2, + "learning_rate": 7.195902688860435e-05, + "loss": 0.7342, + "step": 4002 + }, + { + "epoch": 3.2, + "learning_rate": 7.19270166453265e-05, + "loss": 0.7089, + "step": 4003 + }, + { + "epoch": 3.2, + "learning_rate": 7.189500640204867e-05, + "loss": 0.6058, + "step": 4004 + }, + { + "epoch": 3.2, + "learning_rate": 7.186299615877081e-05, + "loss": 0.6454, + "step": 4005 + }, + { + "epoch": 3.2, + "learning_rate": 7.183098591549297e-05, + "loss": 0.667, + "step": 4006 + }, + { + "epoch": 3.21, + "learning_rate": 7.17989756722151e-05, + "loss": 0.7388, + "step": 4007 + }, + { + "epoch": 3.21, + "learning_rate": 7.176696542893726e-05, + "loss": 0.6944, + "step": 4008 + }, + { + "epoch": 3.21, + "learning_rate": 7.173495518565941e-05, + "loss": 0.7652, + "step": 4009 + }, + { + "epoch": 3.21, + "learning_rate": 7.170294494238157e-05, + "loss": 0.5636, + "step": 4010 + }, + { + "epoch": 3.21, + "learning_rate": 7.167093469910372e-05, + "loss": 0.6254, + "step": 4011 + }, + { + "epoch": 3.21, + "learning_rate": 7.163892445582588e-05, + "loss": 0.6377, + "step": 4012 + }, + { + "epoch": 3.21, + "learning_rate": 7.160691421254801e-05, + "loss": 0.6997, + "step": 4013 + }, + { + "epoch": 3.21, + "learning_rate": 7.157490396927017e-05, + "loss": 0.6629, + "step": 4014 + }, + { + "epoch": 3.21, + "learning_rate": 7.154289372599232e-05, + "loss": 0.7809, + "step": 4015 + }, + { + "epoch": 3.21, + "learning_rate": 7.151088348271447e-05, + "loss": 0.6747, + "step": 4016 + }, + { + "epoch": 3.21, + "learning_rate": 7.147887323943662e-05, + "loss": 0.6807, + "step": 4017 + }, + { + "epoch": 3.21, + "learning_rate": 7.144686299615877e-05, + "loss": 0.7186, + "step": 4018 + }, + { + "epoch": 3.22, + "learning_rate": 7.141485275288093e-05, + "loss": 0.7022, + "step": 4019 + }, + { + "epoch": 3.22, + "learning_rate": 7.138284250960307e-05, + "loss": 0.6441, + "step": 4020 + }, + { + "epoch": 3.22, + "learning_rate": 7.135083226632523e-05, + "loss": 0.7646, + "step": 4021 + }, + { + "epoch": 3.22, + "learning_rate": 7.131882202304738e-05, + "loss": 0.5846, + "step": 4022 + }, + { + "epoch": 3.22, + "learning_rate": 7.128681177976953e-05, + "loss": 0.6752, + "step": 4023 + }, + { + "epoch": 3.22, + "learning_rate": 7.125480153649167e-05, + "loss": 0.8251, + "step": 4024 + }, + { + "epoch": 3.22, + "learning_rate": 7.122279129321383e-05, + "loss": 0.7023, + "step": 4025 + }, + { + "epoch": 3.22, + "learning_rate": 7.119078104993598e-05, + "loss": 0.6423, + "step": 4026 + }, + { + "epoch": 3.22, + "learning_rate": 7.115877080665814e-05, + "loss": 0.6532, + "step": 4027 + }, + { + "epoch": 3.22, + "learning_rate": 7.112676056338028e-05, + "loss": 0.6383, + "step": 4028 + }, + { + "epoch": 3.22, + "learning_rate": 7.109475032010243e-05, + "loss": 0.7451, + "step": 4029 + }, + { + "epoch": 3.22, + "learning_rate": 7.106274007682459e-05, + "loss": 0.7421, + "step": 4030 + }, + { + "epoch": 3.22, + "learning_rate": 7.103072983354674e-05, + "loss": 0.7432, + "step": 4031 + }, + { + "epoch": 3.23, + "learning_rate": 7.09987195902689e-05, + "loss": 0.6473, + "step": 4032 + }, + { + "epoch": 3.23, + "learning_rate": 7.096670934699104e-05, + "loss": 0.5982, + "step": 4033 + }, + { + "epoch": 3.23, + "learning_rate": 7.093469910371319e-05, + "loss": 0.6696, + "step": 4034 + }, + { + "epoch": 3.23, + "learning_rate": 7.090268886043533e-05, + "loss": 0.7402, + "step": 4035 + }, + { + "epoch": 3.23, + "learning_rate": 7.08706786171575e-05, + "loss": 0.6765, + "step": 4036 + }, + { + "epoch": 3.23, + "learning_rate": 7.083866837387964e-05, + "loss": 0.6614, + "step": 4037 + }, + { + "epoch": 3.23, + "learning_rate": 7.08066581306018e-05, + "loss": 0.7413, + "step": 4038 + }, + { + "epoch": 3.23, + "learning_rate": 7.077464788732395e-05, + "loss": 0.5844, + "step": 4039 + }, + { + "epoch": 3.23, + "learning_rate": 7.074263764404609e-05, + "loss": 0.668, + "step": 4040 + }, + { + "epoch": 3.23, + "learning_rate": 7.071062740076825e-05, + "loss": 0.6593, + "step": 4041 + }, + { + "epoch": 3.23, + "learning_rate": 7.06786171574904e-05, + "loss": 0.6445, + "step": 4042 + }, + { + "epoch": 3.23, + "learning_rate": 7.064660691421256e-05, + "loss": 0.7949, + "step": 4043 + }, + { + "epoch": 3.24, + "learning_rate": 7.06145966709347e-05, + "loss": 0.5542, + "step": 4044 + }, + { + "epoch": 3.24, + "learning_rate": 7.058258642765685e-05, + "loss": 0.7106, + "step": 4045 + }, + { + "epoch": 3.24, + "learning_rate": 7.0550576184379e-05, + "loss": 0.7065, + "step": 4046 + }, + { + "epoch": 3.24, + "learning_rate": 7.051856594110116e-05, + "loss": 0.6263, + "step": 4047 + }, + { + "epoch": 3.24, + "learning_rate": 7.04865556978233e-05, + "loss": 0.658, + "step": 4048 + }, + { + "epoch": 3.24, + "learning_rate": 7.045454545454546e-05, + "loss": 0.5963, + "step": 4049 + }, + { + "epoch": 3.24, + "learning_rate": 7.042253521126761e-05, + "loss": 0.6989, + "step": 4050 + }, + { + "epoch": 3.24, + "learning_rate": 7.039052496798976e-05, + "loss": 0.6599, + "step": 4051 + }, + { + "epoch": 3.24, + "learning_rate": 7.035851472471192e-05, + "loss": 0.8016, + "step": 4052 + }, + { + "epoch": 3.24, + "learning_rate": 7.032650448143406e-05, + "loss": 0.589, + "step": 4053 + }, + { + "epoch": 3.24, + "learning_rate": 7.029449423815622e-05, + "loss": 0.7169, + "step": 4054 + }, + { + "epoch": 3.24, + "learning_rate": 7.026248399487837e-05, + "loss": 0.8287, + "step": 4055 + }, + { + "epoch": 3.24, + "learning_rate": 7.023047375160051e-05, + "loss": 0.6686, + "step": 4056 + }, + { + "epoch": 3.25, + "learning_rate": 7.019846350832266e-05, + "loss": 0.6445, + "step": 4057 + }, + { + "epoch": 3.25, + "learning_rate": 7.016645326504482e-05, + "loss": 0.6958, + "step": 4058 + }, + { + "epoch": 3.25, + "learning_rate": 7.013444302176697e-05, + "loss": 0.8017, + "step": 4059 + }, + { + "epoch": 3.25, + "learning_rate": 7.010243277848913e-05, + "loss": 0.673, + "step": 4060 + }, + { + "epoch": 3.25, + "learning_rate": 7.007042253521127e-05, + "loss": 0.7549, + "step": 4061 + }, + { + "epoch": 3.25, + "learning_rate": 7.003841229193342e-05, + "loss": 0.6873, + "step": 4062 + }, + { + "epoch": 3.25, + "learning_rate": 7.000640204865558e-05, + "loss": 0.7235, + "step": 4063 + }, + { + "epoch": 3.25, + "learning_rate": 6.997439180537772e-05, + "loss": 0.7815, + "step": 4064 + }, + { + "epoch": 3.25, + "learning_rate": 6.994238156209988e-05, + "loss": 0.8735, + "step": 4065 + }, + { + "epoch": 3.25, + "learning_rate": 6.991037131882202e-05, + "loss": 0.7173, + "step": 4066 + }, + { + "epoch": 3.25, + "learning_rate": 6.987836107554418e-05, + "loss": 0.6896, + "step": 4067 + }, + { + "epoch": 3.25, + "learning_rate": 6.984635083226632e-05, + "loss": 0.7077, + "step": 4068 + }, + { + "epoch": 3.26, + "learning_rate": 6.981434058898848e-05, + "loss": 0.6673, + "step": 4069 + }, + { + "epoch": 3.26, + "learning_rate": 6.978233034571063e-05, + "loss": 0.6717, + "step": 4070 + }, + { + "epoch": 3.26, + "learning_rate": 6.975032010243279e-05, + "loss": 0.6405, + "step": 4071 + }, + { + "epoch": 3.26, + "learning_rate": 6.971830985915493e-05, + "loss": 0.6583, + "step": 4072 + }, + { + "epoch": 3.26, + "learning_rate": 6.968629961587708e-05, + "loss": 0.6751, + "step": 4073 + }, + { + "epoch": 3.26, + "learning_rate": 6.965428937259924e-05, + "loss": 0.7041, + "step": 4074 + }, + { + "epoch": 3.26, + "learning_rate": 6.962227912932139e-05, + "loss": 0.5831, + "step": 4075 + }, + { + "epoch": 3.26, + "learning_rate": 6.959026888604355e-05, + "loss": 0.6955, + "step": 4076 + }, + { + "epoch": 3.26, + "learning_rate": 6.955825864276568e-05, + "loss": 0.6883, + "step": 4077 + }, + { + "epoch": 3.26, + "learning_rate": 6.952624839948784e-05, + "loss": 0.7435, + "step": 4078 + }, + { + "epoch": 3.26, + "learning_rate": 6.949423815620999e-05, + "loss": 0.7523, + "step": 4079 + }, + { + "epoch": 3.26, + "learning_rate": 6.946222791293215e-05, + "loss": 0.5529, + "step": 4080 + }, + { + "epoch": 3.26, + "learning_rate": 6.943021766965429e-05, + "loss": 0.7944, + "step": 4081 + }, + { + "epoch": 3.27, + "learning_rate": 6.939820742637645e-05, + "loss": 0.726, + "step": 4082 + }, + { + "epoch": 3.27, + "learning_rate": 6.93661971830986e-05, + "loss": 0.6664, + "step": 4083 + }, + { + "epoch": 3.27, + "learning_rate": 6.933418693982074e-05, + "loss": 0.6828, + "step": 4084 + }, + { + "epoch": 3.27, + "learning_rate": 6.93021766965429e-05, + "loss": 0.8541, + "step": 4085 + }, + { + "epoch": 3.27, + "learning_rate": 6.927016645326505e-05, + "loss": 0.7191, + "step": 4086 + }, + { + "epoch": 3.27, + "learning_rate": 6.923815620998721e-05, + "loss": 0.7801, + "step": 4087 + }, + { + "epoch": 3.27, + "learning_rate": 6.920614596670934e-05, + "loss": 0.6431, + "step": 4088 + }, + { + "epoch": 3.27, + "learning_rate": 6.91741357234315e-05, + "loss": 0.6282, + "step": 4089 + }, + { + "epoch": 3.27, + "learning_rate": 6.914212548015365e-05, + "loss": 0.7305, + "step": 4090 + }, + { + "epoch": 3.27, + "learning_rate": 6.911011523687581e-05, + "loss": 0.6772, + "step": 4091 + }, + { + "epoch": 3.27, + "learning_rate": 6.907810499359795e-05, + "loss": 0.6481, + "step": 4092 + }, + { + "epoch": 3.27, + "learning_rate": 6.90460947503201e-05, + "loss": 0.6414, + "step": 4093 + }, + { + "epoch": 3.28, + "learning_rate": 6.901408450704226e-05, + "loss": 0.8044, + "step": 4094 + }, + { + "epoch": 3.28, + "learning_rate": 6.89820742637644e-05, + "loss": 0.7181, + "step": 4095 + }, + { + "epoch": 3.28, + "learning_rate": 6.895006402048657e-05, + "loss": 0.6738, + "step": 4096 + }, + { + "epoch": 3.28, + "learning_rate": 6.891805377720871e-05, + "loss": 0.6641, + "step": 4097 + }, + { + "epoch": 3.28, + "learning_rate": 6.888604353393087e-05, + "loss": 0.8143, + "step": 4098 + }, + { + "epoch": 3.28, + "learning_rate": 6.8854033290653e-05, + "loss": 0.7516, + "step": 4099 + }, + { + "epoch": 3.28, + "learning_rate": 6.882202304737516e-05, + "loss": 0.752, + "step": 4100 + }, + { + "epoch": 3.28, + "learning_rate": 6.879001280409731e-05, + "loss": 0.7657, + "step": 4101 + }, + { + "epoch": 3.28, + "learning_rate": 6.875800256081947e-05, + "loss": 0.8042, + "step": 4102 + }, + { + "epoch": 3.28, + "learning_rate": 6.872599231754162e-05, + "loss": 0.7716, + "step": 4103 + }, + { + "epoch": 3.28, + "learning_rate": 6.869398207426376e-05, + "loss": 0.6802, + "step": 4104 + }, + { + "epoch": 3.28, + "learning_rate": 6.866197183098591e-05, + "loss": 0.8591, + "step": 4105 + }, + { + "epoch": 3.28, + "learning_rate": 6.862996158770807e-05, + "loss": 0.711, + "step": 4106 + }, + { + "epoch": 3.29, + "learning_rate": 6.859795134443022e-05, + "loss": 0.6691, + "step": 4107 + }, + { + "epoch": 3.29, + "learning_rate": 6.856594110115238e-05, + "loss": 0.6871, + "step": 4108 + }, + { + "epoch": 3.29, + "learning_rate": 6.853393085787452e-05, + "loss": 0.7233, + "step": 4109 + }, + { + "epoch": 3.29, + "learning_rate": 6.850192061459667e-05, + "loss": 0.7697, + "step": 4110 + }, + { + "epoch": 3.29, + "learning_rate": 6.846991037131883e-05, + "loss": 0.6935, + "step": 4111 + }, + { + "epoch": 3.29, + "learning_rate": 6.843790012804097e-05, + "loss": 0.6202, + "step": 4112 + }, + { + "epoch": 3.29, + "learning_rate": 6.840588988476313e-05, + "loss": 0.7316, + "step": 4113 + }, + { + "epoch": 3.29, + "learning_rate": 6.837387964148528e-05, + "loss": 0.6224, + "step": 4114 + }, + { + "epoch": 3.29, + "learning_rate": 6.834186939820743e-05, + "loss": 0.7215, + "step": 4115 + }, + { + "epoch": 3.29, + "learning_rate": 6.830985915492957e-05, + "loss": 0.8197, + "step": 4116 + }, + { + "epoch": 3.29, + "learning_rate": 6.827784891165173e-05, + "loss": 0.6945, + "step": 4117 + }, + { + "epoch": 3.29, + "learning_rate": 6.824583866837388e-05, + "loss": 0.5186, + "step": 4118 + }, + { + "epoch": 3.3, + "learning_rate": 6.821382842509604e-05, + "loss": 0.5521, + "step": 4119 + }, + { + "epoch": 3.3, + "learning_rate": 6.818181818181818e-05, + "loss": 0.5786, + "step": 4120 + }, + { + "epoch": 3.3, + "learning_rate": 6.814980793854033e-05, + "loss": 0.788, + "step": 4121 + }, + { + "epoch": 3.3, + "learning_rate": 6.811779769526249e-05, + "loss": 0.8562, + "step": 4122 + }, + { + "epoch": 3.3, + "learning_rate": 6.808578745198464e-05, + "loss": 0.7054, + "step": 4123 + }, + { + "epoch": 3.3, + "learning_rate": 6.80537772087068e-05, + "loss": 0.7037, + "step": 4124 + }, + { + "epoch": 3.3, + "learning_rate": 6.802176696542894e-05, + "loss": 0.7512, + "step": 4125 + }, + { + "epoch": 3.3, + "learning_rate": 6.798975672215109e-05, + "loss": 0.7021, + "step": 4126 + }, + { + "epoch": 3.3, + "learning_rate": 6.795774647887324e-05, + "loss": 0.7644, + "step": 4127 + }, + { + "epoch": 3.3, + "learning_rate": 6.79257362355954e-05, + "loss": 0.6773, + "step": 4128 + }, + { + "epoch": 3.3, + "learning_rate": 6.789372599231754e-05, + "loss": 0.7256, + "step": 4129 + }, + { + "epoch": 3.3, + "learning_rate": 6.78617157490397e-05, + "loss": 0.6771, + "step": 4130 + }, + { + "epoch": 3.3, + "learning_rate": 6.782970550576185e-05, + "loss": 0.612, + "step": 4131 + }, + { + "epoch": 3.31, + "learning_rate": 6.7797695262484e-05, + "loss": 0.6913, + "step": 4132 + }, + { + "epoch": 3.31, + "learning_rate": 6.776568501920615e-05, + "loss": 0.7272, + "step": 4133 + }, + { + "epoch": 3.31, + "learning_rate": 6.77336747759283e-05, + "loss": 0.6781, + "step": 4134 + }, + { + "epoch": 3.31, + "learning_rate": 6.770166453265046e-05, + "loss": 0.834, + "step": 4135 + }, + { + "epoch": 3.31, + "learning_rate": 6.766965428937259e-05, + "loss": 0.6203, + "step": 4136 + }, + { + "epoch": 3.31, + "learning_rate": 6.763764404609475e-05, + "loss": 0.6205, + "step": 4137 + }, + { + "epoch": 3.31, + "learning_rate": 6.76056338028169e-05, + "loss": 0.6366, + "step": 4138 + }, + { + "epoch": 3.31, + "learning_rate": 6.757362355953906e-05, + "loss": 0.6332, + "step": 4139 + }, + { + "epoch": 3.31, + "learning_rate": 6.75416133162612e-05, + "loss": 0.6586, + "step": 4140 + }, + { + "epoch": 3.31, + "learning_rate": 6.750960307298336e-05, + "loss": 0.6773, + "step": 4141 + }, + { + "epoch": 3.31, + "learning_rate": 6.747759282970551e-05, + "loss": 0.7366, + "step": 4142 + }, + { + "epoch": 3.31, + "learning_rate": 6.744558258642766e-05, + "loss": 0.7146, + "step": 4143 + }, + { + "epoch": 3.32, + "learning_rate": 6.741357234314982e-05, + "loss": 0.9265, + "step": 4144 + }, + { + "epoch": 3.32, + "learning_rate": 6.738156209987196e-05, + "loss": 0.666, + "step": 4145 + }, + { + "epoch": 3.32, + "learning_rate": 6.734955185659412e-05, + "loss": 0.8244, + "step": 4146 + }, + { + "epoch": 3.32, + "learning_rate": 6.731754161331625e-05, + "loss": 0.7909, + "step": 4147 + }, + { + "epoch": 3.32, + "learning_rate": 6.728553137003841e-05, + "loss": 0.6953, + "step": 4148 + }, + { + "epoch": 3.32, + "learning_rate": 6.725352112676056e-05, + "loss": 0.7129, + "step": 4149 + }, + { + "epoch": 3.32, + "learning_rate": 6.722151088348272e-05, + "loss": 0.6658, + "step": 4150 + }, + { + "epoch": 3.32, + "learning_rate": 6.718950064020487e-05, + "loss": 0.7012, + "step": 4151 + }, + { + "epoch": 3.32, + "learning_rate": 6.715749039692703e-05, + "loss": 0.7427, + "step": 4152 + }, + { + "epoch": 3.32, + "learning_rate": 6.712548015364917e-05, + "loss": 0.6988, + "step": 4153 + }, + { + "epoch": 3.32, + "learning_rate": 6.709346991037132e-05, + "loss": 0.7723, + "step": 4154 + }, + { + "epoch": 3.32, + "learning_rate": 6.706145966709348e-05, + "loss": 0.7739, + "step": 4155 + }, + { + "epoch": 3.32, + "learning_rate": 6.702944942381563e-05, + "loss": 0.7877, + "step": 4156 + }, + { + "epoch": 3.33, + "learning_rate": 6.699743918053778e-05, + "loss": 0.6356, + "step": 4157 + }, + { + "epoch": 3.33, + "learning_rate": 6.696542893725992e-05, + "loss": 0.6921, + "step": 4158 + }, + { + "epoch": 3.33, + "learning_rate": 6.693341869398208e-05, + "loss": 0.6771, + "step": 4159 + }, + { + "epoch": 3.33, + "learning_rate": 6.690140845070422e-05, + "loss": 0.7159, + "step": 4160 + }, + { + "epoch": 3.33, + "learning_rate": 6.686939820742638e-05, + "loss": 0.6351, + "step": 4161 + }, + { + "epoch": 3.33, + "learning_rate": 6.683738796414853e-05, + "loss": 0.7888, + "step": 4162 + }, + { + "epoch": 3.33, + "learning_rate": 6.680537772087068e-05, + "loss": 0.8826, + "step": 4163 + }, + { + "epoch": 3.33, + "learning_rate": 6.677336747759284e-05, + "loss": 0.7291, + "step": 4164 + }, + { + "epoch": 3.33, + "learning_rate": 6.674135723431498e-05, + "loss": 0.7871, + "step": 4165 + }, + { + "epoch": 3.33, + "learning_rate": 6.670934699103714e-05, + "loss": 0.7448, + "step": 4166 + }, + { + "epoch": 3.33, + "learning_rate": 6.667733674775929e-05, + "loss": 0.6442, + "step": 4167 + }, + { + "epoch": 3.33, + "learning_rate": 6.664532650448145e-05, + "loss": 0.7476, + "step": 4168 + }, + { + "epoch": 3.34, + "learning_rate": 6.661331626120358e-05, + "loss": 0.7438, + "step": 4169 + }, + { + "epoch": 3.34, + "learning_rate": 6.658130601792574e-05, + "loss": 0.7108, + "step": 4170 + }, + { + "epoch": 3.34, + "learning_rate": 6.654929577464789e-05, + "loss": 0.7285, + "step": 4171 + }, + { + "epoch": 3.34, + "learning_rate": 6.651728553137005e-05, + "loss": 0.6494, + "step": 4172 + }, + { + "epoch": 3.34, + "learning_rate": 6.648527528809219e-05, + "loss": 0.6914, + "step": 4173 + }, + { + "epoch": 3.34, + "learning_rate": 6.645326504481434e-05, + "loss": 0.7461, + "step": 4174 + }, + { + "epoch": 3.34, + "learning_rate": 6.64212548015365e-05, + "loss": 0.7641, + "step": 4175 + }, + { + "epoch": 3.34, + "learning_rate": 6.638924455825864e-05, + "loss": 0.5777, + "step": 4176 + }, + { + "epoch": 3.34, + "learning_rate": 6.63572343149808e-05, + "loss": 0.6993, + "step": 4177 + }, + { + "epoch": 3.34, + "learning_rate": 6.632522407170295e-05, + "loss": 0.7002, + "step": 4178 + }, + { + "epoch": 3.34, + "learning_rate": 6.629321382842511e-05, + "loss": 0.8369, + "step": 4179 + }, + { + "epoch": 3.34, + "learning_rate": 6.626120358514724e-05, + "loss": 0.6883, + "step": 4180 + }, + { + "epoch": 3.34, + "learning_rate": 6.62291933418694e-05, + "loss": 0.603, + "step": 4181 + }, + { + "epoch": 3.35, + "learning_rate": 6.619718309859155e-05, + "loss": 0.7024, + "step": 4182 + }, + { + "epoch": 3.35, + "learning_rate": 6.616517285531371e-05, + "loss": 0.6882, + "step": 4183 + }, + { + "epoch": 3.35, + "learning_rate": 6.613316261203586e-05, + "loss": 0.8854, + "step": 4184 + }, + { + "epoch": 3.35, + "learning_rate": 6.6101152368758e-05, + "loss": 0.7489, + "step": 4185 + }, + { + "epoch": 3.35, + "learning_rate": 6.606914212548016e-05, + "loss": 0.733, + "step": 4186 + }, + { + "epoch": 3.35, + "learning_rate": 6.603713188220231e-05, + "loss": 0.6671, + "step": 4187 + }, + { + "epoch": 3.35, + "learning_rate": 6.600512163892447e-05, + "loss": 0.787, + "step": 4188 + }, + { + "epoch": 3.35, + "learning_rate": 6.597311139564661e-05, + "loss": 0.6873, + "step": 4189 + }, + { + "epoch": 3.35, + "learning_rate": 6.594110115236876e-05, + "loss": 0.8123, + "step": 4190 + }, + { + "epoch": 3.35, + "learning_rate": 6.59090909090909e-05, + "loss": 0.6701, + "step": 4191 + }, + { + "epoch": 3.35, + "learning_rate": 6.587708066581307e-05, + "loss": 0.6187, + "step": 4192 + }, + { + "epoch": 3.35, + "learning_rate": 6.584507042253521e-05, + "loss": 0.6348, + "step": 4193 + }, + { + "epoch": 3.36, + "learning_rate": 6.581306017925737e-05, + "loss": 0.685, + "step": 4194 + }, + { + "epoch": 3.36, + "learning_rate": 6.578104993597952e-05, + "loss": 0.7194, + "step": 4195 + }, + { + "epoch": 3.36, + "learning_rate": 6.574903969270166e-05, + "loss": 0.6414, + "step": 4196 + }, + { + "epoch": 3.36, + "learning_rate": 6.571702944942381e-05, + "loss": 0.6913, + "step": 4197 + }, + { + "epoch": 3.36, + "learning_rate": 6.568501920614597e-05, + "loss": 0.722, + "step": 4198 + }, + { + "epoch": 3.36, + "learning_rate": 6.565300896286813e-05, + "loss": 0.7322, + "step": 4199 + }, + { + "epoch": 3.36, + "learning_rate": 6.562099871959028e-05, + "loss": 0.7205, + "step": 4200 + }, + { + "epoch": 3.36, + "learning_rate": 6.558898847631242e-05, + "loss": 0.802, + "step": 4201 + }, + { + "epoch": 3.36, + "learning_rate": 6.555697823303457e-05, + "loss": 0.625, + "step": 4202 + }, + { + "epoch": 3.36, + "learning_rate": 6.552496798975673e-05, + "loss": 0.5546, + "step": 4203 + }, + { + "epoch": 3.36, + "learning_rate": 6.549295774647887e-05, + "loss": 0.7215, + "step": 4204 + }, + { + "epoch": 3.36, + "learning_rate": 6.546094750320103e-05, + "loss": 0.6653, + "step": 4205 + }, + { + "epoch": 3.36, + "learning_rate": 6.542893725992317e-05, + "loss": 0.65, + "step": 4206 + }, + { + "epoch": 3.37, + "learning_rate": 6.539692701664533e-05, + "loss": 0.6912, + "step": 4207 + }, + { + "epoch": 3.37, + "learning_rate": 6.536491677336747e-05, + "loss": 0.842, + "step": 4208 + }, + { + "epoch": 3.37, + "learning_rate": 6.533290653008963e-05, + "loss": 0.6956, + "step": 4209 + }, + { + "epoch": 3.37, + "learning_rate": 6.530089628681178e-05, + "loss": 0.7455, + "step": 4210 + }, + { + "epoch": 3.37, + "learning_rate": 6.526888604353394e-05, + "loss": 0.6202, + "step": 4211 + }, + { + "epoch": 3.37, + "learning_rate": 6.523687580025609e-05, + "loss": 0.6646, + "step": 4212 + }, + { + "epoch": 3.37, + "learning_rate": 6.520486555697823e-05, + "loss": 0.6089, + "step": 4213 + }, + { + "epoch": 3.37, + "learning_rate": 6.517285531370039e-05, + "loss": 0.6428, + "step": 4214 + }, + { + "epoch": 3.37, + "learning_rate": 6.514084507042254e-05, + "loss": 0.7097, + "step": 4215 + }, + { + "epoch": 3.37, + "learning_rate": 6.51088348271447e-05, + "loss": 0.6738, + "step": 4216 + }, + { + "epoch": 3.37, + "learning_rate": 6.507682458386683e-05, + "loss": 0.7185, + "step": 4217 + }, + { + "epoch": 3.37, + "learning_rate": 6.504481434058899e-05, + "loss": 0.5816, + "step": 4218 + }, + { + "epoch": 3.38, + "learning_rate": 6.501280409731114e-05, + "loss": 0.7458, + "step": 4219 + }, + { + "epoch": 3.38, + "learning_rate": 6.49807938540333e-05, + "loss": 0.7311, + "step": 4220 + }, + { + "epoch": 3.38, + "learning_rate": 6.494878361075544e-05, + "loss": 0.7029, + "step": 4221 + }, + { + "epoch": 3.38, + "learning_rate": 6.49167733674776e-05, + "loss": 0.7268, + "step": 4222 + }, + { + "epoch": 3.38, + "learning_rate": 6.488476312419975e-05, + "loss": 0.754, + "step": 4223 + }, + { + "epoch": 3.38, + "learning_rate": 6.48527528809219e-05, + "loss": 0.684, + "step": 4224 + }, + { + "epoch": 3.38, + "learning_rate": 6.482074263764405e-05, + "loss": 0.6252, + "step": 4225 + }, + { + "epoch": 3.38, + "learning_rate": 6.47887323943662e-05, + "loss": 0.5758, + "step": 4226 + }, + { + "epoch": 3.38, + "learning_rate": 6.475672215108836e-05, + "loss": 0.7831, + "step": 4227 + }, + { + "epoch": 3.38, + "learning_rate": 6.472471190781049e-05, + "loss": 0.8511, + "step": 4228 + }, + { + "epoch": 3.38, + "learning_rate": 6.469270166453265e-05, + "loss": 0.7063, + "step": 4229 + }, + { + "epoch": 3.38, + "learning_rate": 6.46606914212548e-05, + "loss": 0.7723, + "step": 4230 + }, + { + "epoch": 3.38, + "learning_rate": 6.462868117797696e-05, + "loss": 0.6786, + "step": 4231 + }, + { + "epoch": 3.39, + "learning_rate": 6.45966709346991e-05, + "loss": 0.6675, + "step": 4232 + }, + { + "epoch": 3.39, + "learning_rate": 6.456466069142125e-05, + "loss": 0.7688, + "step": 4233 + }, + { + "epoch": 3.39, + "learning_rate": 6.453265044814341e-05, + "loss": 0.5662, + "step": 4234 + }, + { + "epoch": 3.39, + "learning_rate": 6.450064020486556e-05, + "loss": 0.6406, + "step": 4235 + }, + { + "epoch": 3.39, + "learning_rate": 6.446862996158772e-05, + "loss": 0.6893, + "step": 4236 + }, + { + "epoch": 3.39, + "learning_rate": 6.443661971830986e-05, + "loss": 0.6588, + "step": 4237 + }, + { + "epoch": 3.39, + "learning_rate": 6.440460947503202e-05, + "loss": 0.7816, + "step": 4238 + }, + { + "epoch": 3.39, + "learning_rate": 6.437259923175416e-05, + "loss": 0.8686, + "step": 4239 + }, + { + "epoch": 3.39, + "learning_rate": 6.434058898847632e-05, + "loss": 0.5776, + "step": 4240 + }, + { + "epoch": 3.39, + "learning_rate": 6.430857874519846e-05, + "loss": 0.6695, + "step": 4241 + }, + { + "epoch": 3.39, + "learning_rate": 6.427656850192062e-05, + "loss": 0.7307, + "step": 4242 + }, + { + "epoch": 3.39, + "learning_rate": 6.424455825864277e-05, + "loss": 0.787, + "step": 4243 + }, + { + "epoch": 3.4, + "learning_rate": 6.421254801536491e-05, + "loss": 0.7329, + "step": 4244 + }, + { + "epoch": 3.4, + "learning_rate": 6.418053777208707e-05, + "loss": 0.8, + "step": 4245 + }, + { + "epoch": 3.4, + "learning_rate": 6.414852752880922e-05, + "loss": 0.6676, + "step": 4246 + }, + { + "epoch": 3.4, + "learning_rate": 6.411651728553138e-05, + "loss": 0.6894, + "step": 4247 + }, + { + "epoch": 3.4, + "learning_rate": 6.408450704225353e-05, + "loss": 0.6839, + "step": 4248 + }, + { + "epoch": 3.4, + "learning_rate": 6.405249679897569e-05, + "loss": 0.6021, + "step": 4249 + }, + { + "epoch": 3.4, + "learning_rate": 6.402048655569782e-05, + "loss": 0.6692, + "step": 4250 + }, + { + "epoch": 3.4, + "learning_rate": 6.398847631241998e-05, + "loss": 0.7682, + "step": 4251 + }, + { + "epoch": 3.4, + "learning_rate": 6.395646606914212e-05, + "loss": 0.66, + "step": 4252 + }, + { + "epoch": 3.4, + "learning_rate": 6.392445582586428e-05, + "loss": 0.5609, + "step": 4253 + }, + { + "epoch": 3.4, + "learning_rate": 6.389244558258643e-05, + "loss": 0.795, + "step": 4254 + }, + { + "epoch": 3.4, + "learning_rate": 6.386043533930858e-05, + "loss": 0.6564, + "step": 4255 + }, + { + "epoch": 3.4, + "learning_rate": 6.382842509603074e-05, + "loss": 0.5752, + "step": 4256 + }, + { + "epoch": 3.41, + "learning_rate": 6.379641485275288e-05, + "loss": 0.6477, + "step": 4257 + }, + { + "epoch": 3.41, + "learning_rate": 6.376440460947504e-05, + "loss": 0.7006, + "step": 4258 + }, + { + "epoch": 3.41, + "learning_rate": 6.373239436619719e-05, + "loss": 0.7159, + "step": 4259 + }, + { + "epoch": 3.41, + "learning_rate": 6.370038412291933e-05, + "loss": 0.7437, + "step": 4260 + }, + { + "epoch": 3.41, + "learning_rate": 6.366837387964148e-05, + "loss": 0.7208, + "step": 4261 + }, + { + "epoch": 3.41, + "learning_rate": 6.363636363636364e-05, + "loss": 0.7812, + "step": 4262 + }, + { + "epoch": 3.41, + "learning_rate": 6.360435339308579e-05, + "loss": 0.6444, + "step": 4263 + }, + { + "epoch": 3.41, + "learning_rate": 6.357234314980795e-05, + "loss": 0.7154, + "step": 4264 + }, + { + "epoch": 3.41, + "learning_rate": 6.354033290653009e-05, + "loss": 0.6947, + "step": 4265 + }, + { + "epoch": 3.41, + "learning_rate": 6.350832266325224e-05, + "loss": 0.5887, + "step": 4266 + }, + { + "epoch": 3.41, + "learning_rate": 6.34763124199744e-05, + "loss": 0.7377, + "step": 4267 + }, + { + "epoch": 3.41, + "learning_rate": 6.344430217669655e-05, + "loss": 0.7933, + "step": 4268 + }, + { + "epoch": 3.42, + "learning_rate": 6.34122919334187e-05, + "loss": 0.6699, + "step": 4269 + }, + { + "epoch": 3.42, + "learning_rate": 6.338028169014085e-05, + "loss": 0.7103, + "step": 4270 + }, + { + "epoch": 3.42, + "learning_rate": 6.3348271446863e-05, + "loss": 0.7648, + "step": 4271 + }, + { + "epoch": 3.42, + "learning_rate": 6.331626120358514e-05, + "loss": 0.7612, + "step": 4272 + }, + { + "epoch": 3.42, + "learning_rate": 6.32842509603073e-05, + "loss": 0.6397, + "step": 4273 + }, + { + "epoch": 3.42, + "learning_rate": 6.325224071702945e-05, + "loss": 0.6972, + "step": 4274 + }, + { + "epoch": 3.42, + "learning_rate": 6.322023047375161e-05, + "loss": 0.6618, + "step": 4275 + }, + { + "epoch": 3.42, + "learning_rate": 6.318822023047376e-05, + "loss": 0.6795, + "step": 4276 + }, + { + "epoch": 3.42, + "learning_rate": 6.31562099871959e-05, + "loss": 0.6572, + "step": 4277 + }, + { + "epoch": 3.42, + "learning_rate": 6.312419974391806e-05, + "loss": 0.6309, + "step": 4278 + }, + { + "epoch": 3.42, + "learning_rate": 6.309218950064021e-05, + "loss": 0.7571, + "step": 4279 + }, + { + "epoch": 3.42, + "learning_rate": 6.306017925736237e-05, + "loss": 0.6433, + "step": 4280 + }, + { + "epoch": 3.42, + "learning_rate": 6.302816901408451e-05, + "loss": 0.8364, + "step": 4281 + }, + { + "epoch": 3.43, + "learning_rate": 6.299615877080666e-05, + "loss": 0.7657, + "step": 4282 + }, + { + "epoch": 3.43, + "learning_rate": 6.29641485275288e-05, + "loss": 0.6094, + "step": 4283 + }, + { + "epoch": 3.43, + "learning_rate": 6.293213828425097e-05, + "loss": 0.7642, + "step": 4284 + }, + { + "epoch": 3.43, + "learning_rate": 6.290012804097311e-05, + "loss": 0.8367, + "step": 4285 + }, + { + "epoch": 3.43, + "learning_rate": 6.286811779769527e-05, + "loss": 0.6295, + "step": 4286 + }, + { + "epoch": 3.43, + "learning_rate": 6.28361075544174e-05, + "loss": 0.654, + "step": 4287 + }, + { + "epoch": 3.43, + "learning_rate": 6.280409731113956e-05, + "loss": 0.6797, + "step": 4288 + }, + { + "epoch": 3.43, + "learning_rate": 6.277208706786172e-05, + "loss": 0.6715, + "step": 4289 + }, + { + "epoch": 3.43, + "learning_rate": 6.274007682458387e-05, + "loss": 0.7307, + "step": 4290 + }, + { + "epoch": 3.43, + "learning_rate": 6.270806658130603e-05, + "loss": 0.7332, + "step": 4291 + }, + { + "epoch": 3.43, + "learning_rate": 6.267605633802818e-05, + "loss": 0.6255, + "step": 4292 + }, + { + "epoch": 3.43, + "learning_rate": 6.264404609475032e-05, + "loss": 0.6085, + "step": 4293 + }, + { + "epoch": 3.44, + "learning_rate": 6.261203585147247e-05, + "loss": 0.6571, + "step": 4294 + }, + { + "epoch": 3.44, + "learning_rate": 6.258002560819463e-05, + "loss": 0.6628, + "step": 4295 + }, + { + "epoch": 3.44, + "learning_rate": 6.254801536491678e-05, + "loss": 0.6376, + "step": 4296 + }, + { + "epoch": 3.44, + "learning_rate": 6.251600512163893e-05, + "loss": 0.7457, + "step": 4297 + }, + { + "epoch": 3.44, + "learning_rate": 6.248399487836107e-05, + "loss": 0.6695, + "step": 4298 + }, + { + "epoch": 3.44, + "learning_rate": 6.245198463508323e-05, + "loss": 0.6622, + "step": 4299 + }, + { + "epoch": 3.44, + "learning_rate": 6.241997439180537e-05, + "loss": 0.7241, + "step": 4300 + }, + { + "epoch": 3.44, + "learning_rate": 6.238796414852753e-05, + "loss": 0.7057, + "step": 4301 + }, + { + "epoch": 3.44, + "learning_rate": 6.23559539052497e-05, + "loss": 0.6475, + "step": 4302 + }, + { + "epoch": 3.44, + "learning_rate": 6.232394366197183e-05, + "loss": 0.6679, + "step": 4303 + }, + { + "epoch": 3.44, + "learning_rate": 6.229193341869399e-05, + "loss": 0.7028, + "step": 4304 + }, + { + "epoch": 3.44, + "learning_rate": 6.225992317541613e-05, + "loss": 0.6229, + "step": 4305 + }, + { + "epoch": 3.44, + "learning_rate": 6.222791293213829e-05, + "loss": 0.5842, + "step": 4306 + }, + { + "epoch": 3.45, + "learning_rate": 6.219590268886044e-05, + "loss": 0.7773, + "step": 4307 + }, + { + "epoch": 3.45, + "learning_rate": 6.21638924455826e-05, + "loss": 0.8217, + "step": 4308 + }, + { + "epoch": 3.45, + "learning_rate": 6.213188220230473e-05, + "loss": 0.6783, + "step": 4309 + }, + { + "epoch": 3.45, + "learning_rate": 6.209987195902689e-05, + "loss": 0.6717, + "step": 4310 + }, + { + "epoch": 3.45, + "learning_rate": 6.206786171574904e-05, + "loss": 0.7146, + "step": 4311 + }, + { + "epoch": 3.45, + "learning_rate": 6.20358514724712e-05, + "loss": 0.6831, + "step": 4312 + }, + { + "epoch": 3.45, + "learning_rate": 6.200384122919334e-05, + "loss": 0.6629, + "step": 4313 + }, + { + "epoch": 3.45, + "learning_rate": 6.197183098591549e-05, + "loss": 0.5963, + "step": 4314 + }, + { + "epoch": 3.45, + "learning_rate": 6.193982074263765e-05, + "loss": 0.7725, + "step": 4315 + }, + { + "epoch": 3.45, + "learning_rate": 6.19078104993598e-05, + "loss": 0.6398, + "step": 4316 + }, + { + "epoch": 3.45, + "learning_rate": 6.187580025608195e-05, + "loss": 0.825, + "step": 4317 + }, + { + "epoch": 3.45, + "learning_rate": 6.18437900128041e-05, + "loss": 0.7755, + "step": 4318 + }, + { + "epoch": 3.46, + "learning_rate": 6.181177976952626e-05, + "loss": 0.6861, + "step": 4319 + }, + { + "epoch": 3.46, + "learning_rate": 6.177976952624839e-05, + "loss": 0.763, + "step": 4320 + }, + { + "epoch": 3.46, + "learning_rate": 6.174775928297055e-05, + "loss": 0.6056, + "step": 4321 + }, + { + "epoch": 3.46, + "learning_rate": 6.17157490396927e-05, + "loss": 0.7769, + "step": 4322 + }, + { + "epoch": 3.46, + "learning_rate": 6.168373879641486e-05, + "loss": 0.712, + "step": 4323 + }, + { + "epoch": 3.46, + "learning_rate": 6.1651728553137e-05, + "loss": 0.764, + "step": 4324 + }, + { + "epoch": 3.46, + "learning_rate": 6.161971830985915e-05, + "loss": 0.6934, + "step": 4325 + }, + { + "epoch": 3.46, + "learning_rate": 6.158770806658131e-05, + "loss": 0.8549, + "step": 4326 + }, + { + "epoch": 3.46, + "learning_rate": 6.155569782330346e-05, + "loss": 0.6893, + "step": 4327 + }, + { + "epoch": 3.46, + "learning_rate": 6.152368758002562e-05, + "loss": 0.652, + "step": 4328 + }, + { + "epoch": 3.46, + "learning_rate": 6.149167733674776e-05, + "loss": 0.6919, + "step": 4329 + }, + { + "epoch": 3.46, + "learning_rate": 6.145966709346991e-05, + "loss": 0.7116, + "step": 4330 + }, + { + "epoch": 3.46, + "learning_rate": 6.142765685019206e-05, + "loss": 0.5749, + "step": 4331 + }, + { + "epoch": 3.47, + "learning_rate": 6.139564660691422e-05, + "loss": 0.6806, + "step": 4332 + }, + { + "epoch": 3.47, + "learning_rate": 6.136363636363636e-05, + "loss": 0.7629, + "step": 4333 + }, + { + "epoch": 3.47, + "learning_rate": 6.133162612035852e-05, + "loss": 0.6681, + "step": 4334 + }, + { + "epoch": 3.47, + "learning_rate": 6.129961587708067e-05, + "loss": 0.6679, + "step": 4335 + }, + { + "epoch": 3.47, + "learning_rate": 6.126760563380281e-05, + "loss": 0.7106, + "step": 4336 + }, + { + "epoch": 3.47, + "learning_rate": 6.123559539052497e-05, + "loss": 0.6676, + "step": 4337 + }, + { + "epoch": 3.47, + "learning_rate": 6.120358514724712e-05, + "loss": 0.7281, + "step": 4338 + }, + { + "epoch": 3.47, + "learning_rate": 6.117157490396928e-05, + "loss": 0.6987, + "step": 4339 + }, + { + "epoch": 3.47, + "learning_rate": 6.113956466069143e-05, + "loss": 0.7291, + "step": 4340 + }, + { + "epoch": 3.47, + "learning_rate": 6.110755441741357e-05, + "loss": 0.8109, + "step": 4341 + }, + { + "epoch": 3.47, + "learning_rate": 6.107554417413572e-05, + "loss": 0.7585, + "step": 4342 + }, + { + "epoch": 3.47, + "learning_rate": 6.104353393085788e-05, + "loss": 0.8332, + "step": 4343 + }, + { + "epoch": 3.48, + "learning_rate": 6.101152368758003e-05, + "loss": 0.7058, + "step": 4344 + }, + { + "epoch": 3.48, + "learning_rate": 6.0979513444302185e-05, + "loss": 0.744, + "step": 4345 + }, + { + "epoch": 3.48, + "learning_rate": 6.0947503201024324e-05, + "loss": 0.7556, + "step": 4346 + }, + { + "epoch": 3.48, + "learning_rate": 6.091549295774648e-05, + "loss": 0.7647, + "step": 4347 + }, + { + "epoch": 3.48, + "learning_rate": 6.088348271446863e-05, + "loss": 0.74, + "step": 4348 + }, + { + "epoch": 3.48, + "learning_rate": 6.085147247119078e-05, + "loss": 0.7043, + "step": 4349 + }, + { + "epoch": 3.48, + "learning_rate": 6.0819462227912936e-05, + "loss": 0.7566, + "step": 4350 + }, + { + "epoch": 3.48, + "learning_rate": 6.078745198463509e-05, + "loss": 0.7474, + "step": 4351 + }, + { + "epoch": 3.48, + "learning_rate": 6.0755441741357235e-05, + "loss": 0.687, + "step": 4352 + }, + { + "epoch": 3.48, + "learning_rate": 6.072343149807939e-05, + "loss": 0.6193, + "step": 4353 + }, + { + "epoch": 3.48, + "learning_rate": 6.069142125480154e-05, + "loss": 0.644, + "step": 4354 + }, + { + "epoch": 3.48, + "learning_rate": 6.0659411011523694e-05, + "loss": 0.7766, + "step": 4355 + }, + { + "epoch": 3.48, + "learning_rate": 6.062740076824585e-05, + "loss": 0.7196, + "step": 4356 + }, + { + "epoch": 3.49, + "learning_rate": 6.059539052496799e-05, + "loss": 0.6246, + "step": 4357 + }, + { + "epoch": 3.49, + "learning_rate": 6.056338028169014e-05, + "loss": 0.7459, + "step": 4358 + }, + { + "epoch": 3.49, + "learning_rate": 6.053137003841229e-05, + "loss": 0.7069, + "step": 4359 + }, + { + "epoch": 3.49, + "learning_rate": 6.0499359795134446e-05, + "loss": 0.8074, + "step": 4360 + }, + { + "epoch": 3.49, + "learning_rate": 6.04673495518566e-05, + "loss": 0.7689, + "step": 4361 + }, + { + "epoch": 3.49, + "learning_rate": 6.043533930857875e-05, + "loss": 0.695, + "step": 4362 + }, + { + "epoch": 3.49, + "learning_rate": 6.04033290653009e-05, + "loss": 0.7715, + "step": 4363 + }, + { + "epoch": 3.49, + "learning_rate": 6.037131882202305e-05, + "loss": 0.6939, + "step": 4364 + }, + { + "epoch": 3.49, + "learning_rate": 6.0339308578745204e-05, + "loss": 0.7074, + "step": 4365 + }, + { + "epoch": 3.49, + "learning_rate": 6.030729833546736e-05, + "loss": 0.7525, + "step": 4366 + }, + { + "epoch": 3.49, + "learning_rate": 6.027528809218951e-05, + "loss": 0.7212, + "step": 4367 + }, + { + "epoch": 3.49, + "learning_rate": 6.024327784891165e-05, + "loss": 0.6988, + "step": 4368 + }, + { + "epoch": 3.5, + "learning_rate": 6.02112676056338e-05, + "loss": 0.5779, + "step": 4369 + }, + { + "epoch": 3.5, + "learning_rate": 6.0179257362355956e-05, + "loss": 0.7394, + "step": 4370 + }, + { + "epoch": 3.5, + "learning_rate": 6.014724711907811e-05, + "loss": 0.6771, + "step": 4371 + }, + { + "epoch": 3.5, + "learning_rate": 6.011523687580026e-05, + "loss": 0.7383, + "step": 4372 + }, + { + "epoch": 3.5, + "learning_rate": 6.008322663252241e-05, + "loss": 0.688, + "step": 4373 + }, + { + "epoch": 3.5, + "learning_rate": 6.005121638924456e-05, + "loss": 0.7324, + "step": 4374 + }, + { + "epoch": 3.5, + "learning_rate": 6.0019206145966714e-05, + "loss": 0.7246, + "step": 4375 + }, + { + "epoch": 3.5, + "learning_rate": 5.998719590268887e-05, + "loss": 0.6393, + "step": 4376 + }, + { + "epoch": 3.5, + "learning_rate": 5.995518565941102e-05, + "loss": 0.6148, + "step": 4377 + }, + { + "epoch": 3.5, + "learning_rate": 5.992317541613317e-05, + "loss": 0.7735, + "step": 4378 + }, + { + "epoch": 3.5, + "learning_rate": 5.989116517285531e-05, + "loss": 0.6065, + "step": 4379 + }, + { + "epoch": 3.5, + "learning_rate": 5.9859154929577465e-05, + "loss": 0.826, + "step": 4380 + }, + { + "epoch": 3.5, + "learning_rate": 5.982714468629962e-05, + "loss": 0.7072, + "step": 4381 + }, + { + "epoch": 3.51, + "learning_rate": 5.979513444302177e-05, + "loss": 0.8663, + "step": 4382 + }, + { + "epoch": 3.51, + "learning_rate": 5.9763124199743924e-05, + "loss": 0.757, + "step": 4383 + }, + { + "epoch": 3.51, + "learning_rate": 5.9731113956466064e-05, + "loss": 0.7774, + "step": 4384 + }, + { + "epoch": 3.51, + "learning_rate": 5.9699103713188224e-05, + "loss": 0.6458, + "step": 4385 + }, + { + "epoch": 3.51, + "learning_rate": 5.9667093469910377e-05, + "loss": 0.7119, + "step": 4386 + }, + { + "epoch": 3.51, + "learning_rate": 5.963508322663253e-05, + "loss": 0.7223, + "step": 4387 + }, + { + "epoch": 3.51, + "learning_rate": 5.960307298335468e-05, + "loss": 0.6482, + "step": 4388 + }, + { + "epoch": 3.51, + "learning_rate": 5.9571062740076836e-05, + "loss": 0.6542, + "step": 4389 + }, + { + "epoch": 3.51, + "learning_rate": 5.9539052496798975e-05, + "loss": 0.7969, + "step": 4390 + }, + { + "epoch": 3.51, + "learning_rate": 5.950704225352113e-05, + "loss": 0.6902, + "step": 4391 + }, + { + "epoch": 3.51, + "learning_rate": 5.947503201024328e-05, + "loss": 0.7135, + "step": 4392 + }, + { + "epoch": 3.51, + "learning_rate": 5.9443021766965434e-05, + "loss": 0.6631, + "step": 4393 + }, + { + "epoch": 3.52, + "learning_rate": 5.941101152368759e-05, + "loss": 0.7771, + "step": 4394 + }, + { + "epoch": 3.52, + "learning_rate": 5.9379001280409727e-05, + "loss": 0.673, + "step": 4395 + }, + { + "epoch": 3.52, + "learning_rate": 5.934699103713188e-05, + "loss": 0.7324, + "step": 4396 + }, + { + "epoch": 3.52, + "learning_rate": 5.931498079385403e-05, + "loss": 0.5876, + "step": 4397 + }, + { + "epoch": 3.52, + "learning_rate": 5.928297055057619e-05, + "loss": 0.7206, + "step": 4398 + }, + { + "epoch": 3.52, + "learning_rate": 5.9250960307298345e-05, + "loss": 0.6627, + "step": 4399 + }, + { + "epoch": 3.52, + "learning_rate": 5.9218950064020485e-05, + "loss": 0.7185, + "step": 4400 + }, + { + "epoch": 3.52, + "learning_rate": 5.918693982074264e-05, + "loss": 0.6543, + "step": 4401 + }, + { + "epoch": 3.52, + "learning_rate": 5.915492957746479e-05, + "loss": 0.7285, + "step": 4402 + }, + { + "epoch": 3.52, + "learning_rate": 5.9122919334186944e-05, + "loss": 0.588, + "step": 4403 + }, + { + "epoch": 3.52, + "learning_rate": 5.90909090909091e-05, + "loss": 0.6729, + "step": 4404 + }, + { + "epoch": 3.52, + "learning_rate": 5.905889884763125e-05, + "loss": 0.8435, + "step": 4405 + }, + { + "epoch": 3.52, + "learning_rate": 5.902688860435339e-05, + "loss": 0.612, + "step": 4406 + }, + { + "epoch": 3.53, + "learning_rate": 5.899487836107554e-05, + "loss": 0.7692, + "step": 4407 + }, + { + "epoch": 3.53, + "learning_rate": 5.8962868117797695e-05, + "loss": 0.691, + "step": 4408 + }, + { + "epoch": 3.53, + "learning_rate": 5.893085787451985e-05, + "loss": 0.6881, + "step": 4409 + }, + { + "epoch": 3.53, + "learning_rate": 5.8898847631242e-05, + "loss": 0.7316, + "step": 4410 + }, + { + "epoch": 3.53, + "learning_rate": 5.886683738796415e-05, + "loss": 0.6368, + "step": 4411 + }, + { + "epoch": 3.53, + "learning_rate": 5.88348271446863e-05, + "loss": 0.717, + "step": 4412 + }, + { + "epoch": 3.53, + "learning_rate": 5.8802816901408454e-05, + "loss": 0.7198, + "step": 4413 + }, + { + "epoch": 3.53, + "learning_rate": 5.8770806658130607e-05, + "loss": 0.6673, + "step": 4414 + }, + { + "epoch": 3.53, + "learning_rate": 5.873879641485276e-05, + "loss": 0.5749, + "step": 4415 + }, + { + "epoch": 3.53, + "learning_rate": 5.87067861715749e-05, + "loss": 0.6923, + "step": 4416 + }, + { + "epoch": 3.53, + "learning_rate": 5.867477592829705e-05, + "loss": 0.7641, + "step": 4417 + }, + { + "epoch": 3.53, + "learning_rate": 5.8642765685019205e-05, + "loss": 0.7041, + "step": 4418 + }, + { + "epoch": 3.54, + "learning_rate": 5.861075544174136e-05, + "loss": 0.7096, + "step": 4419 + }, + { + "epoch": 3.54, + "learning_rate": 5.857874519846351e-05, + "loss": 0.6236, + "step": 4420 + }, + { + "epoch": 3.54, + "learning_rate": 5.8546734955185664e-05, + "loss": 0.6812, + "step": 4421 + }, + { + "epoch": 3.54, + "learning_rate": 5.851472471190781e-05, + "loss": 0.6029, + "step": 4422 + }, + { + "epoch": 3.54, + "learning_rate": 5.848271446862996e-05, + "loss": 0.6844, + "step": 4423 + }, + { + "epoch": 3.54, + "learning_rate": 5.8450704225352116e-05, + "loss": 0.6449, + "step": 4424 + }, + { + "epoch": 3.54, + "learning_rate": 5.841869398207427e-05, + "loss": 0.6183, + "step": 4425 + }, + { + "epoch": 3.54, + "learning_rate": 5.838668373879642e-05, + "loss": 0.7123, + "step": 4426 + }, + { + "epoch": 3.54, + "learning_rate": 5.835467349551856e-05, + "loss": 0.6372, + "step": 4427 + }, + { + "epoch": 3.54, + "learning_rate": 5.8322663252240715e-05, + "loss": 0.6719, + "step": 4428 + }, + { + "epoch": 3.54, + "learning_rate": 5.829065300896287e-05, + "loss": 0.7343, + "step": 4429 + }, + { + "epoch": 3.54, + "learning_rate": 5.825864276568502e-05, + "loss": 0.726, + "step": 4430 + }, + { + "epoch": 3.54, + "learning_rate": 5.8226632522407174e-05, + "loss": 0.6236, + "step": 4431 + }, + { + "epoch": 3.55, + "learning_rate": 5.819462227912933e-05, + "loss": 0.7199, + "step": 4432 + }, + { + "epoch": 3.55, + "learning_rate": 5.816261203585147e-05, + "loss": 0.7615, + "step": 4433 + }, + { + "epoch": 3.55, + "learning_rate": 5.8130601792573626e-05, + "loss": 0.6132, + "step": 4434 + }, + { + "epoch": 3.55, + "learning_rate": 5.809859154929578e-05, + "loss": 0.6717, + "step": 4435 + }, + { + "epoch": 3.55, + "learning_rate": 5.806658130601793e-05, + "loss": 0.7703, + "step": 4436 + }, + { + "epoch": 3.55, + "learning_rate": 5.8034571062740085e-05, + "loss": 0.6671, + "step": 4437 + }, + { + "epoch": 3.55, + "learning_rate": 5.8002560819462225e-05, + "loss": 0.6972, + "step": 4438 + }, + { + "epoch": 3.55, + "learning_rate": 5.797055057618438e-05, + "loss": 0.7542, + "step": 4439 + }, + { + "epoch": 3.55, + "learning_rate": 5.793854033290653e-05, + "loss": 0.77, + "step": 4440 + }, + { + "epoch": 3.55, + "learning_rate": 5.7906530089628684e-05, + "loss": 0.7279, + "step": 4441 + }, + { + "epoch": 3.55, + "learning_rate": 5.7874519846350837e-05, + "loss": 0.6639, + "step": 4442 + }, + { + "epoch": 3.55, + "learning_rate": 5.784250960307298e-05, + "loss": 0.6736, + "step": 4443 + }, + { + "epoch": 3.56, + "learning_rate": 5.7810499359795136e-05, + "loss": 0.7558, + "step": 4444 + }, + { + "epoch": 3.56, + "learning_rate": 5.777848911651729e-05, + "loss": 0.7892, + "step": 4445 + }, + { + "epoch": 3.56, + "learning_rate": 5.774647887323944e-05, + "loss": 0.7413, + "step": 4446 + }, + { + "epoch": 3.56, + "learning_rate": 5.7714468629961595e-05, + "loss": 0.674, + "step": 4447 + }, + { + "epoch": 3.56, + "learning_rate": 5.768245838668375e-05, + "loss": 0.688, + "step": 4448 + }, + { + "epoch": 3.56, + "learning_rate": 5.765044814340589e-05, + "loss": 0.6525, + "step": 4449 + }, + { + "epoch": 3.56, + "learning_rate": 5.761843790012804e-05, + "loss": 0.734, + "step": 4450 + }, + { + "epoch": 3.56, + "learning_rate": 5.758642765685019e-05, + "loss": 0.6769, + "step": 4451 + }, + { + "epoch": 3.56, + "learning_rate": 5.7554417413572346e-05, + "loss": 0.7794, + "step": 4452 + }, + { + "epoch": 3.56, + "learning_rate": 5.75224071702945e-05, + "loss": 0.6997, + "step": 4453 + }, + { + "epoch": 3.56, + "learning_rate": 5.7490396927016646e-05, + "loss": 0.6943, + "step": 4454 + }, + { + "epoch": 3.56, + "learning_rate": 5.74583866837388e-05, + "loss": 0.6885, + "step": 4455 + }, + { + "epoch": 3.56, + "learning_rate": 5.742637644046095e-05, + "loss": 0.7245, + "step": 4456 + }, + { + "epoch": 3.57, + "learning_rate": 5.7394366197183105e-05, + "loss": 0.7294, + "step": 4457 + }, + { + "epoch": 3.57, + "learning_rate": 5.736235595390526e-05, + "loss": 0.7344, + "step": 4458 + }, + { + "epoch": 3.57, + "learning_rate": 5.733034571062741e-05, + "loss": 0.5778, + "step": 4459 + }, + { + "epoch": 3.57, + "learning_rate": 5.729833546734955e-05, + "loss": 0.6848, + "step": 4460 + }, + { + "epoch": 3.57, + "learning_rate": 5.72663252240717e-05, + "loss": 0.6988, + "step": 4461 + }, + { + "epoch": 3.57, + "learning_rate": 5.7234314980793856e-05, + "loss": 0.65, + "step": 4462 + }, + { + "epoch": 3.57, + "learning_rate": 5.720230473751601e-05, + "loss": 0.714, + "step": 4463 + }, + { + "epoch": 3.57, + "learning_rate": 5.717029449423816e-05, + "loss": 0.7178, + "step": 4464 + }, + { + "epoch": 3.57, + "learning_rate": 5.713828425096031e-05, + "loss": 0.6363, + "step": 4465 + }, + { + "epoch": 3.57, + "learning_rate": 5.710627400768246e-05, + "loss": 0.7004, + "step": 4466 + }, + { + "epoch": 3.57, + "learning_rate": 5.7074263764404614e-05, + "loss": 0.6963, + "step": 4467 + }, + { + "epoch": 3.57, + "learning_rate": 5.704225352112677e-05, + "loss": 0.7882, + "step": 4468 + }, + { + "epoch": 3.58, + "learning_rate": 5.701024327784892e-05, + "loss": 0.6559, + "step": 4469 + }, + { + "epoch": 3.58, + "learning_rate": 5.697823303457106e-05, + "loss": 0.752, + "step": 4470 + }, + { + "epoch": 3.58, + "learning_rate": 5.694622279129321e-05, + "loss": 0.6198, + "step": 4471 + }, + { + "epoch": 3.58, + "learning_rate": 5.6914212548015366e-05, + "loss": 0.6761, + "step": 4472 + }, + { + "epoch": 3.58, + "learning_rate": 5.688220230473752e-05, + "loss": 0.7993, + "step": 4473 + }, + { + "epoch": 3.58, + "learning_rate": 5.685019206145967e-05, + "loss": 0.7153, + "step": 4474 + }, + { + "epoch": 3.58, + "learning_rate": 5.6818181818181825e-05, + "loss": 0.6228, + "step": 4475 + }, + { + "epoch": 3.58, + "learning_rate": 5.678617157490397e-05, + "loss": 0.7156, + "step": 4476 + }, + { + "epoch": 3.58, + "learning_rate": 5.6754161331626124e-05, + "loss": 0.6742, + "step": 4477 + }, + { + "epoch": 3.58, + "learning_rate": 5.672215108834828e-05, + "loss": 0.7513, + "step": 4478 + }, + { + "epoch": 3.58, + "learning_rate": 5.669014084507043e-05, + "loss": 0.6541, + "step": 4479 + }, + { + "epoch": 3.58, + "learning_rate": 5.665813060179258e-05, + "loss": 0.7303, + "step": 4480 + }, + { + "epoch": 3.58, + "learning_rate": 5.662612035851472e-05, + "loss": 0.6607, + "step": 4481 + }, + { + "epoch": 3.59, + "learning_rate": 5.6594110115236876e-05, + "loss": 0.7702, + "step": 4482 + }, + { + "epoch": 3.59, + "learning_rate": 5.656209987195903e-05, + "loss": 0.7051, + "step": 4483 + }, + { + "epoch": 3.59, + "learning_rate": 5.653008962868118e-05, + "loss": 0.6601, + "step": 4484 + }, + { + "epoch": 3.59, + "learning_rate": 5.6498079385403335e-05, + "loss": 0.8148, + "step": 4485 + }, + { + "epoch": 3.59, + "learning_rate": 5.646606914212549e-05, + "loss": 0.7127, + "step": 4486 + }, + { + "epoch": 3.59, + "learning_rate": 5.643405889884763e-05, + "loss": 0.7085, + "step": 4487 + }, + { + "epoch": 3.59, + "learning_rate": 5.640204865556979e-05, + "loss": 0.6758, + "step": 4488 + }, + { + "epoch": 3.59, + "learning_rate": 5.637003841229194e-05, + "loss": 0.7578, + "step": 4489 + }, + { + "epoch": 3.59, + "learning_rate": 5.633802816901409e-05, + "loss": 0.6775, + "step": 4490 + }, + { + "epoch": 3.59, + "learning_rate": 5.6306017925736246e-05, + "loss": 0.6774, + "step": 4491 + }, + { + "epoch": 3.59, + "learning_rate": 5.6274007682458385e-05, + "loss": 0.6557, + "step": 4492 + }, + { + "epoch": 3.59, + "learning_rate": 5.624199743918054e-05, + "loss": 0.611, + "step": 4493 + }, + { + "epoch": 3.6, + "learning_rate": 5.620998719590269e-05, + "loss": 0.6374, + "step": 4494 + }, + { + "epoch": 3.6, + "learning_rate": 5.6177976952624844e-05, + "loss": 0.7335, + "step": 4495 + }, + { + "epoch": 3.6, + "learning_rate": 5.6145966709347e-05, + "loss": 0.7986, + "step": 4496 + }, + { + "epoch": 3.6, + "learning_rate": 5.611395646606914e-05, + "loss": 0.6636, + "step": 4497 + }, + { + "epoch": 3.6, + "learning_rate": 5.608194622279129e-05, + "loss": 0.6688, + "step": 4498 + }, + { + "epoch": 3.6, + "learning_rate": 5.604993597951344e-05, + "loss": 0.776, + "step": 4499 + }, + { + "epoch": 3.6, + "learning_rate": 5.6017925736235596e-05, + "loss": 0.7217, + "step": 4500 + }, + { + "epoch": 3.6, + "learning_rate": 5.598591549295775e-05, + "loss": 0.7535, + "step": 4501 + }, + { + "epoch": 3.6, + "learning_rate": 5.595390524967991e-05, + "loss": 0.7302, + "step": 4502 + }, + { + "epoch": 3.6, + "learning_rate": 5.592189500640205e-05, + "loss": 0.6364, + "step": 4503 + }, + { + "epoch": 3.6, + "learning_rate": 5.58898847631242e-05, + "loss": 0.871, + "step": 4504 + }, + { + "epoch": 3.6, + "learning_rate": 5.5857874519846354e-05, + "loss": 0.752, + "step": 4505 + }, + { + "epoch": 3.6, + "learning_rate": 5.582586427656851e-05, + "loss": 0.6547, + "step": 4506 + }, + { + "epoch": 3.61, + "learning_rate": 5.579385403329066e-05, + "loss": 0.6955, + "step": 4507 + }, + { + "epoch": 3.61, + "learning_rate": 5.57618437900128e-05, + "loss": 0.6206, + "step": 4508 + }, + { + "epoch": 3.61, + "learning_rate": 5.572983354673495e-05, + "loss": 0.7641, + "step": 4509 + }, + { + "epoch": 3.61, + "learning_rate": 5.5697823303457106e-05, + "loss": 0.6265, + "step": 4510 + }, + { + "epoch": 3.61, + "learning_rate": 5.566581306017926e-05, + "loss": 0.7897, + "step": 4511 + }, + { + "epoch": 3.61, + "learning_rate": 5.563380281690141e-05, + "loss": 0.7344, + "step": 4512 + }, + { + "epoch": 3.61, + "learning_rate": 5.560179257362356e-05, + "loss": 0.6338, + "step": 4513 + }, + { + "epoch": 3.61, + "learning_rate": 5.556978233034571e-05, + "loss": 0.5628, + "step": 4514 + }, + { + "epoch": 3.61, + "learning_rate": 5.5537772087067864e-05, + "loss": 0.6696, + "step": 4515 + }, + { + "epoch": 3.61, + "learning_rate": 5.550576184379002e-05, + "loss": 0.7186, + "step": 4516 + }, + { + "epoch": 3.61, + "learning_rate": 5.547375160051217e-05, + "loss": 0.6831, + "step": 4517 + }, + { + "epoch": 3.61, + "learning_rate": 5.544174135723432e-05, + "loss": 0.7082, + "step": 4518 + }, + { + "epoch": 3.62, + "learning_rate": 5.540973111395646e-05, + "loss": 0.6906, + "step": 4519 + }, + { + "epoch": 3.62, + "learning_rate": 5.5377720870678615e-05, + "loss": 0.6643, + "step": 4520 + }, + { + "epoch": 3.62, + "learning_rate": 5.534571062740077e-05, + "loss": 0.6674, + "step": 4521 + }, + { + "epoch": 3.62, + "learning_rate": 5.531370038412292e-05, + "loss": 0.6604, + "step": 4522 + }, + { + "epoch": 3.62, + "learning_rate": 5.5281690140845074e-05, + "loss": 0.7554, + "step": 4523 + }, + { + "epoch": 3.62, + "learning_rate": 5.524967989756722e-05, + "loss": 0.7378, + "step": 4524 + }, + { + "epoch": 3.62, + "learning_rate": 5.5217669654289374e-05, + "loss": 0.6454, + "step": 4525 + }, + { + "epoch": 3.62, + "learning_rate": 5.518565941101153e-05, + "loss": 0.67, + "step": 4526 + }, + { + "epoch": 3.62, + "learning_rate": 5.515364916773368e-05, + "loss": 0.6865, + "step": 4527 + }, + { + "epoch": 3.62, + "learning_rate": 5.512163892445583e-05, + "loss": 0.7828, + "step": 4528 + }, + { + "epoch": 3.62, + "learning_rate": 5.5089628681177986e-05, + "loss": 0.7646, + "step": 4529 + }, + { + "epoch": 3.62, + "learning_rate": 5.5057618437900125e-05, + "loss": 0.5903, + "step": 4530 + }, + { + "epoch": 3.62, + "learning_rate": 5.502560819462228e-05, + "loss": 0.7172, + "step": 4531 + }, + { + "epoch": 3.63, + "learning_rate": 5.499359795134443e-05, + "loss": 0.7709, + "step": 4532 + }, + { + "epoch": 3.63, + "learning_rate": 5.4961587708066584e-05, + "loss": 0.7749, + "step": 4533 + }, + { + "epoch": 3.63, + "learning_rate": 5.492957746478874e-05, + "loss": 0.6878, + "step": 4534 + }, + { + "epoch": 3.63, + "learning_rate": 5.4897567221510883e-05, + "loss": 0.7425, + "step": 4535 + }, + { + "epoch": 3.63, + "learning_rate": 5.4865556978233036e-05, + "loss": 0.6696, + "step": 4536 + }, + { + "epoch": 3.63, + "learning_rate": 5.483354673495519e-05, + "loss": 0.8072, + "step": 4537 + }, + { + "epoch": 3.63, + "learning_rate": 5.480153649167734e-05, + "loss": 0.7721, + "step": 4538 + }, + { + "epoch": 3.63, + "learning_rate": 5.4769526248399495e-05, + "loss": 0.7294, + "step": 4539 + }, + { + "epoch": 3.63, + "learning_rate": 5.4737516005121635e-05, + "loss": 0.6726, + "step": 4540 + }, + { + "epoch": 3.63, + "learning_rate": 5.470550576184379e-05, + "loss": 0.7604, + "step": 4541 + }, + { + "epoch": 3.63, + "learning_rate": 5.467349551856594e-05, + "loss": 0.7294, + "step": 4542 + }, + { + "epoch": 3.63, + "learning_rate": 5.4641485275288094e-05, + "loss": 0.6283, + "step": 4543 + }, + { + "epoch": 3.64, + "learning_rate": 5.460947503201025e-05, + "loss": 0.5859, + "step": 4544 + }, + { + "epoch": 3.64, + "learning_rate": 5.45774647887324e-05, + "loss": 0.7952, + "step": 4545 + }, + { + "epoch": 3.64, + "learning_rate": 5.4545454545454546e-05, + "loss": 0.6305, + "step": 4546 + }, + { + "epoch": 3.64, + "learning_rate": 5.45134443021767e-05, + "loss": 0.6847, + "step": 4547 + }, + { + "epoch": 3.64, + "learning_rate": 5.448143405889885e-05, + "loss": 0.6314, + "step": 4548 + }, + { + "epoch": 3.64, + "learning_rate": 5.4449423815621005e-05, + "loss": 0.645, + "step": 4549 + }, + { + "epoch": 3.64, + "learning_rate": 5.441741357234316e-05, + "loss": 0.7204, + "step": 4550 + }, + { + "epoch": 3.64, + "learning_rate": 5.43854033290653e-05, + "loss": 0.7403, + "step": 4551 + }, + { + "epoch": 3.64, + "learning_rate": 5.435339308578745e-05, + "loss": 0.7754, + "step": 4552 + }, + { + "epoch": 3.64, + "learning_rate": 5.4321382842509604e-05, + "loss": 0.6294, + "step": 4553 + }, + { + "epoch": 3.64, + "learning_rate": 5.428937259923176e-05, + "loss": 0.6786, + "step": 4554 + }, + { + "epoch": 3.64, + "learning_rate": 5.425736235595391e-05, + "loss": 0.8126, + "step": 4555 + }, + { + "epoch": 3.64, + "learning_rate": 5.422535211267606e-05, + "loss": 0.629, + "step": 4556 + }, + { + "epoch": 3.65, + "learning_rate": 5.419334186939821e-05, + "loss": 0.7313, + "step": 4557 + }, + { + "epoch": 3.65, + "learning_rate": 5.416133162612036e-05, + "loss": 0.7051, + "step": 4558 + }, + { + "epoch": 3.65, + "learning_rate": 5.4129321382842515e-05, + "loss": 0.7117, + "step": 4559 + }, + { + "epoch": 3.65, + "learning_rate": 5.409731113956467e-05, + "loss": 0.7072, + "step": 4560 + }, + { + "epoch": 3.65, + "learning_rate": 5.406530089628682e-05, + "loss": 0.7881, + "step": 4561 + }, + { + "epoch": 3.65, + "learning_rate": 5.403329065300896e-05, + "loss": 0.7981, + "step": 4562 + }, + { + "epoch": 3.65, + "learning_rate": 5.4001280409731113e-05, + "loss": 0.6737, + "step": 4563 + }, + { + "epoch": 3.65, + "learning_rate": 5.3969270166453266e-05, + "loss": 0.7759, + "step": 4564 + }, + { + "epoch": 3.65, + "learning_rate": 5.393725992317542e-05, + "loss": 0.5139, + "step": 4565 + }, + { + "epoch": 3.65, + "learning_rate": 5.390524967989757e-05, + "loss": 0.6484, + "step": 4566 + }, + { + "epoch": 3.65, + "learning_rate": 5.387323943661972e-05, + "loss": 0.6612, + "step": 4567 + }, + { + "epoch": 3.65, + "learning_rate": 5.384122919334187e-05, + "loss": 0.6611, + "step": 4568 + }, + { + "epoch": 3.66, + "learning_rate": 5.3809218950064025e-05, + "loss": 0.7351, + "step": 4569 + }, + { + "epoch": 3.66, + "learning_rate": 5.377720870678618e-05, + "loss": 0.6955, + "step": 4570 + }, + { + "epoch": 3.66, + "learning_rate": 5.374519846350833e-05, + "loss": 0.8184, + "step": 4571 + }, + { + "epoch": 3.66, + "learning_rate": 5.3713188220230484e-05, + "loss": 0.5937, + "step": 4572 + }, + { + "epoch": 3.66, + "learning_rate": 5.368117797695262e-05, + "loss": 0.6761, + "step": 4573 + }, + { + "epoch": 3.66, + "learning_rate": 5.3649167733674776e-05, + "loss": 0.7019, + "step": 4574 + }, + { + "epoch": 3.66, + "learning_rate": 5.361715749039693e-05, + "loss": 0.7517, + "step": 4575 + }, + { + "epoch": 3.66, + "learning_rate": 5.358514724711908e-05, + "loss": 0.7624, + "step": 4576 + }, + { + "epoch": 3.66, + "learning_rate": 5.3553137003841235e-05, + "loss": 0.767, + "step": 4577 + }, + { + "epoch": 3.66, + "learning_rate": 5.352112676056338e-05, + "loss": 0.8186, + "step": 4578 + }, + { + "epoch": 3.66, + "learning_rate": 5.3489116517285535e-05, + "loss": 0.7373, + "step": 4579 + }, + { + "epoch": 3.66, + "learning_rate": 5.345710627400769e-05, + "loss": 0.8818, + "step": 4580 + }, + { + "epoch": 3.66, + "learning_rate": 5.342509603072984e-05, + "loss": 0.5795, + "step": 4581 + }, + { + "epoch": 3.67, + "learning_rate": 5.3393085787451994e-05, + "loss": 0.6596, + "step": 4582 + }, + { + "epoch": 3.67, + "learning_rate": 5.336107554417413e-05, + "loss": 0.6605, + "step": 4583 + }, + { + "epoch": 3.67, + "learning_rate": 5.3329065300896286e-05, + "loss": 0.6837, + "step": 4584 + }, + { + "epoch": 3.67, + "learning_rate": 5.329705505761844e-05, + "loss": 0.7729, + "step": 4585 + }, + { + "epoch": 3.67, + "learning_rate": 5.326504481434059e-05, + "loss": 0.6443, + "step": 4586 + }, + { + "epoch": 3.67, + "learning_rate": 5.3233034571062745e-05, + "loss": 0.6873, + "step": 4587 + }, + { + "epoch": 3.67, + "learning_rate": 5.32010243277849e-05, + "loss": 0.7204, + "step": 4588 + }, + { + "epoch": 3.67, + "learning_rate": 5.316901408450704e-05, + "loss": 0.662, + "step": 4589 + }, + { + "epoch": 3.67, + "learning_rate": 5.313700384122919e-05, + "loss": 0.6044, + "step": 4590 + }, + { + "epoch": 3.67, + "learning_rate": 5.3104993597951344e-05, + "loss": 0.7239, + "step": 4591 + }, + { + "epoch": 3.67, + "learning_rate": 5.30729833546735e-05, + "loss": 0.6173, + "step": 4592 + }, + { + "epoch": 3.67, + "learning_rate": 5.3040973111395656e-05, + "loss": 0.6767, + "step": 4593 + }, + { + "epoch": 3.68, + "learning_rate": 5.3008962868117796e-05, + "loss": 0.6335, + "step": 4594 + }, + { + "epoch": 3.68, + "learning_rate": 5.297695262483995e-05, + "loss": 0.646, + "step": 4595 + }, + { + "epoch": 3.68, + "learning_rate": 5.29449423815621e-05, + "loss": 0.6795, + "step": 4596 + }, + { + "epoch": 3.68, + "learning_rate": 5.2912932138284255e-05, + "loss": 0.7227, + "step": 4597 + }, + { + "epoch": 3.68, + "learning_rate": 5.288092189500641e-05, + "loss": 0.8293, + "step": 4598 + }, + { + "epoch": 3.68, + "learning_rate": 5.284891165172856e-05, + "loss": 0.8062, + "step": 4599 + }, + { + "epoch": 3.68, + "learning_rate": 5.28169014084507e-05, + "loss": 0.6663, + "step": 4600 + }, + { + "epoch": 3.68, + "learning_rate": 5.278489116517285e-05, + "loss": 0.694, + "step": 4601 + }, + { + "epoch": 3.68, + "learning_rate": 5.2752880921895006e-05, + "loss": 0.7954, + "step": 4602 + }, + { + "epoch": 3.68, + "learning_rate": 5.272087067861716e-05, + "loss": 0.7402, + "step": 4603 + }, + { + "epoch": 3.68, + "learning_rate": 5.268886043533931e-05, + "loss": 0.6941, + "step": 4604 + }, + { + "epoch": 3.68, + "learning_rate": 5.265685019206146e-05, + "loss": 0.7546, + "step": 4605 + }, + { + "epoch": 3.68, + "learning_rate": 5.262483994878361e-05, + "loss": 0.8806, + "step": 4606 + }, + { + "epoch": 3.69, + "learning_rate": 5.2592829705505765e-05, + "loss": 0.6971, + "step": 4607 + }, + { + "epoch": 3.69, + "learning_rate": 5.256081946222792e-05, + "loss": 0.6406, + "step": 4608 + }, + { + "epoch": 3.69, + "learning_rate": 5.252880921895007e-05, + "loss": 0.768, + "step": 4609 + }, + { + "epoch": 3.69, + "learning_rate": 5.249679897567221e-05, + "loss": 0.7656, + "step": 4610 + }, + { + "epoch": 3.69, + "learning_rate": 5.246478873239436e-05, + "loss": 0.6593, + "step": 4611 + }, + { + "epoch": 3.69, + "learning_rate": 5.2432778489116516e-05, + "loss": 0.6196, + "step": 4612 + }, + { + "epoch": 3.69, + "learning_rate": 5.240076824583867e-05, + "loss": 0.7465, + "step": 4613 + }, + { + "epoch": 3.69, + "learning_rate": 5.236875800256082e-05, + "loss": 0.7951, + "step": 4614 + }, + { + "epoch": 3.69, + "learning_rate": 5.2336747759282975e-05, + "loss": 0.6763, + "step": 4615 + }, + { + "epoch": 3.69, + "learning_rate": 5.230473751600512e-05, + "loss": 0.7806, + "step": 4616 + }, + { + "epoch": 3.69, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.69, + "step": 4617 + }, + { + "epoch": 3.69, + "learning_rate": 5.224071702944943e-05, + "loss": 0.7296, + "step": 4618 + }, + { + "epoch": 3.7, + "learning_rate": 5.220870678617158e-05, + "loss": 0.6436, + "step": 4619 + }, + { + "epoch": 3.7, + "learning_rate": 5.217669654289373e-05, + "loss": 0.7271, + "step": 4620 + }, + { + "epoch": 3.7, + "learning_rate": 5.214468629961587e-05, + "loss": 0.6177, + "step": 4621 + }, + { + "epoch": 3.7, + "learning_rate": 5.2112676056338026e-05, + "loss": 0.7715, + "step": 4622 + }, + { + "epoch": 3.7, + "learning_rate": 5.208066581306018e-05, + "loss": 0.7322, + "step": 4623 + }, + { + "epoch": 3.7, + "learning_rate": 5.204865556978233e-05, + "loss": 0.7109, + "step": 4624 + }, + { + "epoch": 3.7, + "learning_rate": 5.2016645326504485e-05, + "loss": 0.6097, + "step": 4625 + }, + { + "epoch": 3.7, + "learning_rate": 5.198463508322664e-05, + "loss": 0.7534, + "step": 4626 + }, + { + "epoch": 3.7, + "learning_rate": 5.1952624839948784e-05, + "loss": 0.691, + "step": 4627 + }, + { + "epoch": 3.7, + "learning_rate": 5.192061459667094e-05, + "loss": 0.6339, + "step": 4628 + }, + { + "epoch": 3.7, + "learning_rate": 5.188860435339309e-05, + "loss": 0.7437, + "step": 4629 + }, + { + "epoch": 3.7, + "learning_rate": 5.185659411011524e-05, + "loss": 0.7362, + "step": 4630 + }, + { + "epoch": 3.7, + "learning_rate": 5.1824583866837396e-05, + "loss": 0.7501, + "step": 4631 + }, + { + "epoch": 3.71, + "learning_rate": 5.1792573623559536e-05, + "loss": 0.6319, + "step": 4632 + }, + { + "epoch": 3.71, + "learning_rate": 5.176056338028169e-05, + "loss": 0.5973, + "step": 4633 + }, + { + "epoch": 3.71, + "learning_rate": 5.172855313700384e-05, + "loss": 0.7697, + "step": 4634 + }, + { + "epoch": 3.71, + "learning_rate": 5.1696542893725995e-05, + "loss": 0.7166, + "step": 4635 + }, + { + "epoch": 3.71, + "learning_rate": 5.166453265044815e-05, + "loss": 0.7797, + "step": 4636 + }, + { + "epoch": 3.71, + "learning_rate": 5.1632522407170294e-05, + "loss": 0.7228, + "step": 4637 + }, + { + "epoch": 3.71, + "learning_rate": 5.160051216389245e-05, + "loss": 0.7139, + "step": 4638 + }, + { + "epoch": 3.71, + "learning_rate": 5.15685019206146e-05, + "loss": 0.6829, + "step": 4639 + }, + { + "epoch": 3.71, + "learning_rate": 5.153649167733675e-05, + "loss": 0.8862, + "step": 4640 + }, + { + "epoch": 3.71, + "learning_rate": 5.1504481434058906e-05, + "loss": 0.749, + "step": 4641 + }, + { + "epoch": 3.71, + "learning_rate": 5.147247119078106e-05, + "loss": 0.6643, + "step": 4642 + }, + { + "epoch": 3.71, + "learning_rate": 5.14404609475032e-05, + "loss": 0.6733, + "step": 4643 + }, + { + "epoch": 3.72, + "learning_rate": 5.140845070422535e-05, + "loss": 0.6581, + "step": 4644 + }, + { + "epoch": 3.72, + "learning_rate": 5.1376440460947504e-05, + "loss": 0.6859, + "step": 4645 + }, + { + "epoch": 3.72, + "learning_rate": 5.134443021766966e-05, + "loss": 0.687, + "step": 4646 + }, + { + "epoch": 3.72, + "learning_rate": 5.131241997439181e-05, + "loss": 0.8113, + "step": 4647 + }, + { + "epoch": 3.72, + "learning_rate": 5.1280409731113957e-05, + "loss": 0.748, + "step": 4648 + }, + { + "epoch": 3.72, + "learning_rate": 5.124839948783611e-05, + "loss": 0.672, + "step": 4649 + }, + { + "epoch": 3.72, + "learning_rate": 5.121638924455826e-05, + "loss": 0.7456, + "step": 4650 + }, + { + "epoch": 3.72, + "learning_rate": 5.1184379001280416e-05, + "loss": 0.7437, + "step": 4651 + }, + { + "epoch": 3.72, + "learning_rate": 5.115236875800257e-05, + "loss": 0.7205, + "step": 4652 + }, + { + "epoch": 3.72, + "learning_rate": 5.112035851472471e-05, + "loss": 0.7365, + "step": 4653 + }, + { + "epoch": 3.72, + "learning_rate": 5.108834827144686e-05, + "loss": 0.7663, + "step": 4654 + }, + { + "epoch": 3.72, + "learning_rate": 5.1056338028169014e-05, + "loss": 0.6298, + "step": 4655 + }, + { + "epoch": 3.72, + "learning_rate": 5.102432778489117e-05, + "loss": 0.6288, + "step": 4656 + }, + { + "epoch": 3.73, + "learning_rate": 5.099231754161332e-05, + "loss": 0.6648, + "step": 4657 + }, + { + "epoch": 3.73, + "learning_rate": 5.096030729833547e-05, + "loss": 0.6963, + "step": 4658 + }, + { + "epoch": 3.73, + "learning_rate": 5.092829705505762e-05, + "loss": 0.5815, + "step": 4659 + }, + { + "epoch": 3.73, + "learning_rate": 5.089628681177977e-05, + "loss": 0.7524, + "step": 4660 + }, + { + "epoch": 3.73, + "learning_rate": 5.0864276568501925e-05, + "loss": 0.7411, + "step": 4661 + }, + { + "epoch": 3.73, + "learning_rate": 5.083226632522408e-05, + "loss": 0.6453, + "step": 4662 + }, + { + "epoch": 3.73, + "learning_rate": 5.080025608194623e-05, + "loss": 0.6834, + "step": 4663 + }, + { + "epoch": 3.73, + "learning_rate": 5.076824583866837e-05, + "loss": 0.6964, + "step": 4664 + }, + { + "epoch": 3.73, + "learning_rate": 5.0736235595390524e-05, + "loss": 0.739, + "step": 4665 + }, + { + "epoch": 3.73, + "learning_rate": 5.070422535211268e-05, + "loss": 0.7338, + "step": 4666 + }, + { + "epoch": 3.73, + "learning_rate": 5.067221510883483e-05, + "loss": 0.6449, + "step": 4667 + }, + { + "epoch": 3.73, + "learning_rate": 5.064020486555698e-05, + "loss": 0.7762, + "step": 4668 + }, + { + "epoch": 3.74, + "learning_rate": 5.0608194622279136e-05, + "loss": 0.6977, + "step": 4669 + }, + { + "epoch": 3.74, + "learning_rate": 5.057618437900128e-05, + "loss": 0.7494, + "step": 4670 + }, + { + "epoch": 3.74, + "learning_rate": 5.0544174135723435e-05, + "loss": 0.7308, + "step": 4671 + }, + { + "epoch": 3.74, + "learning_rate": 5.051216389244559e-05, + "loss": 0.7815, + "step": 4672 + }, + { + "epoch": 3.74, + "learning_rate": 5.048015364916774e-05, + "loss": 0.6845, + "step": 4673 + }, + { + "epoch": 3.74, + "learning_rate": 5.0448143405889894e-05, + "loss": 0.6972, + "step": 4674 + }, + { + "epoch": 3.74, + "learning_rate": 5.0416133162612034e-05, + "loss": 0.7622, + "step": 4675 + }, + { + "epoch": 3.74, + "learning_rate": 5.038412291933419e-05, + "loss": 0.7243, + "step": 4676 + }, + { + "epoch": 3.74, + "learning_rate": 5.035211267605634e-05, + "loss": 0.7498, + "step": 4677 + }, + { + "epoch": 3.74, + "learning_rate": 5.032010243277849e-05, + "loss": 0.6695, + "step": 4678 + }, + { + "epoch": 3.74, + "learning_rate": 5.0288092189500646e-05, + "loss": 0.8248, + "step": 4679 + }, + { + "epoch": 3.74, + "learning_rate": 5.0256081946222785e-05, + "loss": 0.7516, + "step": 4680 + }, + { + "epoch": 3.74, + "learning_rate": 5.022407170294494e-05, + "loss": 0.6587, + "step": 4681 + }, + { + "epoch": 3.75, + "learning_rate": 5.01920614596671e-05, + "loss": 0.7755, + "step": 4682 + }, + { + "epoch": 3.75, + "learning_rate": 5.016005121638925e-05, + "loss": 0.6435, + "step": 4683 + }, + { + "epoch": 3.75, + "learning_rate": 5.0128040973111404e-05, + "loss": 0.7501, + "step": 4684 + }, + { + "epoch": 3.75, + "learning_rate": 5.009603072983356e-05, + "loss": 0.6473, + "step": 4685 + }, + { + "epoch": 3.75, + "learning_rate": 5.0064020486555696e-05, + "loss": 0.6043, + "step": 4686 + }, + { + "epoch": 3.75, + "learning_rate": 5.003201024327785e-05, + "loss": 0.6149, + "step": 4687 + }, + { + "epoch": 3.75, + "learning_rate": 5e-05, + "loss": 0.7631, + "step": 4688 + }, + { + "epoch": 3.75, + "learning_rate": 4.9967989756722155e-05, + "loss": 0.7342, + "step": 4689 + }, + { + "epoch": 3.75, + "learning_rate": 4.99359795134443e-05, + "loss": 0.6764, + "step": 4690 + }, + { + "epoch": 3.75, + "learning_rate": 4.9903969270166455e-05, + "loss": 0.7917, + "step": 4691 + }, + { + "epoch": 3.75, + "learning_rate": 4.987195902688861e-05, + "loss": 0.6885, + "step": 4692 + }, + { + "epoch": 3.75, + "learning_rate": 4.9839948783610754e-05, + "loss": 0.6854, + "step": 4693 + }, + { + "epoch": 3.76, + "learning_rate": 4.980793854033291e-05, + "loss": 0.822, + "step": 4694 + }, + { + "epoch": 3.76, + "learning_rate": 4.977592829705506e-05, + "loss": 0.727, + "step": 4695 + }, + { + "epoch": 3.76, + "learning_rate": 4.974391805377721e-05, + "loss": 0.7155, + "step": 4696 + }, + { + "epoch": 3.76, + "learning_rate": 4.9711907810499366e-05, + "loss": 0.6015, + "step": 4697 + }, + { + "epoch": 3.76, + "learning_rate": 4.967989756722151e-05, + "loss": 0.6175, + "step": 4698 + }, + { + "epoch": 3.76, + "learning_rate": 4.9647887323943665e-05, + "loss": 0.8339, + "step": 4699 + }, + { + "epoch": 3.76, + "learning_rate": 4.961587708066582e-05, + "loss": 0.7489, + "step": 4700 + }, + { + "epoch": 3.76, + "learning_rate": 4.9583866837387964e-05, + "loss": 0.7504, + "step": 4701 + }, + { + "epoch": 3.76, + "learning_rate": 4.955185659411012e-05, + "loss": 0.772, + "step": 4702 + }, + { + "epoch": 3.76, + "learning_rate": 4.9519846350832264e-05, + "loss": 0.6389, + "step": 4703 + }, + { + "epoch": 3.76, + "learning_rate": 4.948783610755442e-05, + "loss": 0.676, + "step": 4704 + }, + { + "epoch": 3.76, + "learning_rate": 4.945582586427657e-05, + "loss": 0.6815, + "step": 4705 + }, + { + "epoch": 3.76, + "learning_rate": 4.942381562099872e-05, + "loss": 0.6939, + "step": 4706 + }, + { + "epoch": 3.77, + "learning_rate": 4.9391805377720876e-05, + "loss": 0.7511, + "step": 4707 + }, + { + "epoch": 3.77, + "learning_rate": 4.935979513444303e-05, + "loss": 0.6534, + "step": 4708 + }, + { + "epoch": 3.77, + "learning_rate": 4.9327784891165175e-05, + "loss": 0.7317, + "step": 4709 + }, + { + "epoch": 3.77, + "learning_rate": 4.929577464788733e-05, + "loss": 0.7103, + "step": 4710 + }, + { + "epoch": 3.77, + "learning_rate": 4.9263764404609474e-05, + "loss": 0.735, + "step": 4711 + }, + { + "epoch": 3.77, + "learning_rate": 4.923175416133163e-05, + "loss": 0.7393, + "step": 4712 + }, + { + "epoch": 3.77, + "learning_rate": 4.919974391805378e-05, + "loss": 0.7038, + "step": 4713 + }, + { + "epoch": 3.77, + "learning_rate": 4.9167733674775926e-05, + "loss": 0.7441, + "step": 4714 + }, + { + "epoch": 3.77, + "learning_rate": 4.913572343149808e-05, + "loss": 0.7103, + "step": 4715 + }, + { + "epoch": 3.77, + "learning_rate": 4.910371318822023e-05, + "loss": 0.669, + "step": 4716 + }, + { + "epoch": 3.77, + "learning_rate": 4.9071702944942385e-05, + "loss": 0.7988, + "step": 4717 + }, + { + "epoch": 3.77, + "learning_rate": 4.903969270166454e-05, + "loss": 0.6224, + "step": 4718 + }, + { + "epoch": 3.78, + "learning_rate": 4.9007682458386685e-05, + "loss": 0.6805, + "step": 4719 + }, + { + "epoch": 3.78, + "learning_rate": 4.897567221510884e-05, + "loss": 0.7823, + "step": 4720 + }, + { + "epoch": 3.78, + "learning_rate": 4.894366197183099e-05, + "loss": 0.7855, + "step": 4721 + }, + { + "epoch": 3.78, + "learning_rate": 4.891165172855314e-05, + "loss": 0.6678, + "step": 4722 + }, + { + "epoch": 3.78, + "learning_rate": 4.887964148527529e-05, + "loss": 0.6468, + "step": 4723 + }, + { + "epoch": 3.78, + "learning_rate": 4.884763124199744e-05, + "loss": 0.738, + "step": 4724 + }, + { + "epoch": 3.78, + "learning_rate": 4.881562099871959e-05, + "loss": 0.6066, + "step": 4725 + }, + { + "epoch": 3.78, + "learning_rate": 4.878361075544174e-05, + "loss": 0.74, + "step": 4726 + }, + { + "epoch": 3.78, + "learning_rate": 4.8751600512163895e-05, + "loss": 0.7714, + "step": 4727 + }, + { + "epoch": 3.78, + "learning_rate": 4.871959026888605e-05, + "loss": 0.6609, + "step": 4728 + }, + { + "epoch": 3.78, + "learning_rate": 4.86875800256082e-05, + "loss": 0.7967, + "step": 4729 + }, + { + "epoch": 3.78, + "learning_rate": 4.865556978233035e-05, + "loss": 0.6913, + "step": 4730 + }, + { + "epoch": 3.78, + "learning_rate": 4.86235595390525e-05, + "loss": 0.6086, + "step": 4731 + }, + { + "epoch": 3.79, + "learning_rate": 4.8591549295774653e-05, + "loss": 0.9007, + "step": 4732 + }, + { + "epoch": 3.79, + "learning_rate": 4.85595390524968e-05, + "loss": 0.81, + "step": 4733 + }, + { + "epoch": 3.79, + "learning_rate": 4.852752880921895e-05, + "loss": 0.8083, + "step": 4734 + }, + { + "epoch": 3.79, + "learning_rate": 4.8495518565941106e-05, + "loss": 0.6475, + "step": 4735 + }, + { + "epoch": 3.79, + "learning_rate": 4.846350832266325e-05, + "loss": 0.6277, + "step": 4736 + }, + { + "epoch": 3.79, + "learning_rate": 4.8431498079385405e-05, + "loss": 0.7513, + "step": 4737 + }, + { + "epoch": 3.79, + "learning_rate": 4.839948783610755e-05, + "loss": 0.7177, + "step": 4738 + }, + { + "epoch": 3.79, + "learning_rate": 4.8367477592829704e-05, + "loss": 0.7063, + "step": 4739 + }, + { + "epoch": 3.79, + "learning_rate": 4.8335467349551864e-05, + "loss": 0.7433, + "step": 4740 + }, + { + "epoch": 3.79, + "learning_rate": 4.830345710627401e-05, + "loss": 0.592, + "step": 4741 + }, + { + "epoch": 3.79, + "learning_rate": 4.827144686299616e-05, + "loss": 0.7767, + "step": 4742 + }, + { + "epoch": 3.79, + "learning_rate": 4.8239436619718316e-05, + "loss": 0.7925, + "step": 4743 + }, + { + "epoch": 3.8, + "learning_rate": 4.820742637644046e-05, + "loss": 0.7216, + "step": 4744 + }, + { + "epoch": 3.8, + "learning_rate": 4.8175416133162615e-05, + "loss": 0.7004, + "step": 4745 + }, + { + "epoch": 3.8, + "learning_rate": 4.814340588988476e-05, + "loss": 0.7201, + "step": 4746 + }, + { + "epoch": 3.8, + "learning_rate": 4.8111395646606915e-05, + "loss": 0.6837, + "step": 4747 + }, + { + "epoch": 3.8, + "learning_rate": 4.807938540332907e-05, + "loss": 0.7215, + "step": 4748 + }, + { + "epoch": 3.8, + "learning_rate": 4.8047375160051214e-05, + "loss": 0.6084, + "step": 4749 + }, + { + "epoch": 3.8, + "learning_rate": 4.801536491677337e-05, + "loss": 0.6654, + "step": 4750 + }, + { + "epoch": 3.8, + "learning_rate": 4.798335467349552e-05, + "loss": 0.6987, + "step": 4751 + }, + { + "epoch": 3.8, + "learning_rate": 4.795134443021767e-05, + "loss": 0.6457, + "step": 4752 + }, + { + "epoch": 3.8, + "learning_rate": 4.7919334186939826e-05, + "loss": 0.7662, + "step": 4753 + }, + { + "epoch": 3.8, + "learning_rate": 4.788732394366197e-05, + "loss": 0.6535, + "step": 4754 + }, + { + "epoch": 3.8, + "learning_rate": 4.7855313700384125e-05, + "loss": 0.633, + "step": 4755 + }, + { + "epoch": 3.8, + "learning_rate": 4.782330345710628e-05, + "loss": 0.706, + "step": 4756 + }, + { + "epoch": 3.81, + "learning_rate": 4.7791293213828424e-05, + "loss": 0.6286, + "step": 4757 + }, + { + "epoch": 3.81, + "learning_rate": 4.775928297055058e-05, + "loss": 0.7717, + "step": 4758 + }, + { + "epoch": 3.81, + "learning_rate": 4.772727272727273e-05, + "loss": 0.6654, + "step": 4759 + }, + { + "epoch": 3.81, + "learning_rate": 4.769526248399488e-05, + "loss": 0.6696, + "step": 4760 + }, + { + "epoch": 3.81, + "learning_rate": 4.766325224071703e-05, + "loss": 0.6599, + "step": 4761 + }, + { + "epoch": 3.81, + "learning_rate": 4.763124199743918e-05, + "loss": 0.7847, + "step": 4762 + }, + { + "epoch": 3.81, + "learning_rate": 4.7599231754161336e-05, + "loss": 0.6446, + "step": 4763 + }, + { + "epoch": 3.81, + "learning_rate": 4.756722151088349e-05, + "loss": 0.6399, + "step": 4764 + }, + { + "epoch": 3.81, + "learning_rate": 4.7535211267605635e-05, + "loss": 0.7396, + "step": 4765 + }, + { + "epoch": 3.81, + "learning_rate": 4.750320102432779e-05, + "loss": 0.8012, + "step": 4766 + }, + { + "epoch": 3.81, + "learning_rate": 4.747119078104994e-05, + "loss": 0.6369, + "step": 4767 + }, + { + "epoch": 3.81, + "learning_rate": 4.743918053777209e-05, + "loss": 0.7713, + "step": 4768 + }, + { + "epoch": 3.82, + "learning_rate": 4.740717029449424e-05, + "loss": 0.8271, + "step": 4769 + }, + { + "epoch": 3.82, + "learning_rate": 4.737516005121639e-05, + "loss": 0.7934, + "step": 4770 + }, + { + "epoch": 3.82, + "learning_rate": 4.734314980793854e-05, + "loss": 0.7019, + "step": 4771 + }, + { + "epoch": 3.82, + "learning_rate": 4.731113956466069e-05, + "loss": 0.6811, + "step": 4772 + }, + { + "epoch": 3.82, + "learning_rate": 4.7279129321382845e-05, + "loss": 0.7796, + "step": 4773 + }, + { + "epoch": 3.82, + "learning_rate": 4.7247119078105e-05, + "loss": 0.7124, + "step": 4774 + }, + { + "epoch": 3.82, + "learning_rate": 4.721510883482715e-05, + "loss": 0.7255, + "step": 4775 + }, + { + "epoch": 3.82, + "learning_rate": 4.71830985915493e-05, + "loss": 0.659, + "step": 4776 + }, + { + "epoch": 3.82, + "learning_rate": 4.715108834827145e-05, + "loss": 0.6891, + "step": 4777 + }, + { + "epoch": 3.82, + "learning_rate": 4.7119078104993604e-05, + "loss": 0.5844, + "step": 4778 + }, + { + "epoch": 3.82, + "learning_rate": 4.708706786171575e-05, + "loss": 0.6601, + "step": 4779 + }, + { + "epoch": 3.82, + "learning_rate": 4.70550576184379e-05, + "loss": 0.67, + "step": 4780 + }, + { + "epoch": 3.82, + "learning_rate": 4.702304737516005e-05, + "loss": 0.6631, + "step": 4781 + }, + { + "epoch": 3.83, + "learning_rate": 4.69910371318822e-05, + "loss": 0.6761, + "step": 4782 + }, + { + "epoch": 3.83, + "learning_rate": 4.6959026888604355e-05, + "loss": 0.7824, + "step": 4783 + }, + { + "epoch": 3.83, + "learning_rate": 4.69270166453265e-05, + "loss": 0.7102, + "step": 4784 + }, + { + "epoch": 3.83, + "learning_rate": 4.689500640204866e-05, + "loss": 0.6236, + "step": 4785 + }, + { + "epoch": 3.83, + "learning_rate": 4.6862996158770814e-05, + "loss": 0.7507, + "step": 4786 + }, + { + "epoch": 3.83, + "learning_rate": 4.683098591549296e-05, + "loss": 0.6627, + "step": 4787 + }, + { + "epoch": 3.83, + "learning_rate": 4.6798975672215114e-05, + "loss": 0.6767, + "step": 4788 + }, + { + "epoch": 3.83, + "learning_rate": 4.676696542893726e-05, + "loss": 0.6936, + "step": 4789 + }, + { + "epoch": 3.83, + "learning_rate": 4.673495518565941e-05, + "loss": 0.7076, + "step": 4790 + }, + { + "epoch": 3.83, + "learning_rate": 4.6702944942381566e-05, + "loss": 0.7149, + "step": 4791 + }, + { + "epoch": 3.83, + "learning_rate": 4.667093469910371e-05, + "loss": 0.6917, + "step": 4792 + }, + { + "epoch": 3.83, + "learning_rate": 4.6638924455825865e-05, + "loss": 0.6995, + "step": 4793 + }, + { + "epoch": 3.84, + "learning_rate": 4.660691421254802e-05, + "loss": 0.7391, + "step": 4794 + }, + { + "epoch": 3.84, + "learning_rate": 4.6574903969270164e-05, + "loss": 0.6332, + "step": 4795 + }, + { + "epoch": 3.84, + "learning_rate": 4.654289372599232e-05, + "loss": 0.7822, + "step": 4796 + }, + { + "epoch": 3.84, + "learning_rate": 4.651088348271447e-05, + "loss": 0.6418, + "step": 4797 + }, + { + "epoch": 3.84, + "learning_rate": 4.647887323943662e-05, + "loss": 0.7166, + "step": 4798 + }, + { + "epoch": 3.84, + "learning_rate": 4.6446862996158776e-05, + "loss": 0.7868, + "step": 4799 + }, + { + "epoch": 3.84, + "learning_rate": 4.641485275288092e-05, + "loss": 0.6917, + "step": 4800 + }, + { + "epoch": 3.84, + "learning_rate": 4.6382842509603076e-05, + "loss": 0.8245, + "step": 4801 + }, + { + "epoch": 3.84, + "learning_rate": 4.635083226632523e-05, + "loss": 0.7129, + "step": 4802 + }, + { + "epoch": 3.84, + "learning_rate": 4.6318822023047375e-05, + "loss": 0.7607, + "step": 4803 + }, + { + "epoch": 3.84, + "learning_rate": 4.628681177976953e-05, + "loss": 0.6587, + "step": 4804 + }, + { + "epoch": 3.84, + "learning_rate": 4.625480153649168e-05, + "loss": 0.6796, + "step": 4805 + }, + { + "epoch": 3.84, + "learning_rate": 4.622279129321383e-05, + "loss": 0.7939, + "step": 4806 + }, + { + "epoch": 3.85, + "learning_rate": 4.619078104993598e-05, + "loss": 0.7774, + "step": 4807 + }, + { + "epoch": 3.85, + "learning_rate": 4.615877080665813e-05, + "loss": 0.6594, + "step": 4808 + }, + { + "epoch": 3.85, + "learning_rate": 4.6126760563380286e-05, + "loss": 0.6571, + "step": 4809 + }, + { + "epoch": 3.85, + "learning_rate": 4.609475032010244e-05, + "loss": 0.6773, + "step": 4810 + }, + { + "epoch": 3.85, + "learning_rate": 4.6062740076824585e-05, + "loss": 0.6974, + "step": 4811 + }, + { + "epoch": 3.85, + "learning_rate": 4.603072983354674e-05, + "loss": 0.6961, + "step": 4812 + }, + { + "epoch": 3.85, + "learning_rate": 4.599871959026889e-05, + "loss": 0.7599, + "step": 4813 + }, + { + "epoch": 3.85, + "learning_rate": 4.596670934699104e-05, + "loss": 0.7203, + "step": 4814 + }, + { + "epoch": 3.85, + "learning_rate": 4.593469910371319e-05, + "loss": 0.603, + "step": 4815 + }, + { + "epoch": 3.85, + "learning_rate": 4.590268886043534e-05, + "loss": 0.7209, + "step": 4816 + }, + { + "epoch": 3.85, + "learning_rate": 4.587067861715749e-05, + "loss": 0.7206, + "step": 4817 + }, + { + "epoch": 3.85, + "learning_rate": 4.583866837387964e-05, + "loss": 0.6556, + "step": 4818 + }, + { + "epoch": 3.86, + "learning_rate": 4.5806658130601796e-05, + "loss": 0.6754, + "step": 4819 + }, + { + "epoch": 3.86, + "learning_rate": 4.577464788732395e-05, + "loss": 0.6918, + "step": 4820 + }, + { + "epoch": 3.86, + "learning_rate": 4.57426376440461e-05, + "loss": 0.7462, + "step": 4821 + }, + { + "epoch": 3.86, + "learning_rate": 4.571062740076825e-05, + "loss": 0.7148, + "step": 4822 + }, + { + "epoch": 3.86, + "learning_rate": 4.56786171574904e-05, + "loss": 0.6594, + "step": 4823 + }, + { + "epoch": 3.86, + "learning_rate": 4.5646606914212554e-05, + "loss": 0.7812, + "step": 4824 + }, + { + "epoch": 3.86, + "learning_rate": 4.56145966709347e-05, + "loss": 0.7585, + "step": 4825 + }, + { + "epoch": 3.86, + "learning_rate": 4.558258642765685e-05, + "loss": 0.7097, + "step": 4826 + }, + { + "epoch": 3.86, + "learning_rate": 4.5550576184379e-05, + "loss": 0.7252, + "step": 4827 + }, + { + "epoch": 3.86, + "learning_rate": 4.551856594110115e-05, + "loss": 0.7843, + "step": 4828 + }, + { + "epoch": 3.86, + "learning_rate": 4.5486555697823306e-05, + "loss": 0.7404, + "step": 4829 + }, + { + "epoch": 3.86, + "learning_rate": 4.545454545454546e-05, + "loss": 0.7354, + "step": 4830 + }, + { + "epoch": 3.86, + "learning_rate": 4.542253521126761e-05, + "loss": 0.771, + "step": 4831 + }, + { + "epoch": 3.87, + "learning_rate": 4.5390524967989765e-05, + "loss": 0.8947, + "step": 4832 + }, + { + "epoch": 3.87, + "learning_rate": 4.535851472471191e-05, + "loss": 0.7144, + "step": 4833 + }, + { + "epoch": 3.87, + "learning_rate": 4.5326504481434064e-05, + "loss": 0.6156, + "step": 4834 + }, + { + "epoch": 3.87, + "learning_rate": 4.529449423815621e-05, + "loss": 0.7101, + "step": 4835 + }, + { + "epoch": 3.87, + "learning_rate": 4.526248399487836e-05, + "loss": 0.6609, + "step": 4836 + }, + { + "epoch": 3.87, + "learning_rate": 4.5230473751600516e-05, + "loss": 0.655, + "step": 4837 + }, + { + "epoch": 3.87, + "learning_rate": 4.519846350832266e-05, + "loss": 0.7163, + "step": 4838 + }, + { + "epoch": 3.87, + "learning_rate": 4.5166453265044815e-05, + "loss": 0.8927, + "step": 4839 + }, + { + "epoch": 3.87, + "learning_rate": 4.513444302176697e-05, + "loss": 0.7132, + "step": 4840 + }, + { + "epoch": 3.87, + "learning_rate": 4.5102432778489115e-05, + "loss": 0.7936, + "step": 4841 + }, + { + "epoch": 3.87, + "learning_rate": 4.507042253521127e-05, + "loss": 0.7244, + "step": 4842 + }, + { + "epoch": 3.87, + "learning_rate": 4.503841229193342e-05, + "loss": 0.684, + "step": 4843 + }, + { + "epoch": 3.88, + "learning_rate": 4.5006402048655574e-05, + "loss": 0.6798, + "step": 4844 + }, + { + "epoch": 3.88, + "learning_rate": 4.4974391805377727e-05, + "loss": 0.6836, + "step": 4845 + }, + { + "epoch": 3.88, + "learning_rate": 4.494238156209987e-05, + "loss": 0.6527, + "step": 4846 + }, + { + "epoch": 3.88, + "learning_rate": 4.4910371318822026e-05, + "loss": 0.7609, + "step": 4847 + }, + { + "epoch": 3.88, + "learning_rate": 4.487836107554418e-05, + "loss": 0.7714, + "step": 4848 + }, + { + "epoch": 3.88, + "learning_rate": 4.4846350832266325e-05, + "loss": 0.7606, + "step": 4849 + }, + { + "epoch": 3.88, + "learning_rate": 4.481434058898848e-05, + "loss": 0.8353, + "step": 4850 + }, + { + "epoch": 3.88, + "learning_rate": 4.4782330345710624e-05, + "loss": 0.7442, + "step": 4851 + }, + { + "epoch": 3.88, + "learning_rate": 4.475032010243278e-05, + "loss": 0.7138, + "step": 4852 + }, + { + "epoch": 3.88, + "learning_rate": 4.471830985915493e-05, + "loss": 0.8241, + "step": 4853 + }, + { + "epoch": 3.88, + "learning_rate": 4.468629961587708e-05, + "loss": 0.7959, + "step": 4854 + }, + { + "epoch": 3.88, + "learning_rate": 4.4654289372599236e-05, + "loss": 0.6873, + "step": 4855 + }, + { + "epoch": 3.88, + "learning_rate": 4.462227912932139e-05, + "loss": 0.646, + "step": 4856 + }, + { + "epoch": 3.89, + "learning_rate": 4.4590268886043536e-05, + "loss": 0.5776, + "step": 4857 + }, + { + "epoch": 3.89, + "learning_rate": 4.455825864276569e-05, + "loss": 0.6885, + "step": 4858 + }, + { + "epoch": 3.89, + "learning_rate": 4.452624839948784e-05, + "loss": 0.7945, + "step": 4859 + }, + { + "epoch": 3.89, + "learning_rate": 4.449423815620999e-05, + "loss": 0.6438, + "step": 4860 + }, + { + "epoch": 3.89, + "learning_rate": 4.446222791293214e-05, + "loss": 0.694, + "step": 4861 + }, + { + "epoch": 3.89, + "learning_rate": 4.443021766965429e-05, + "loss": 0.649, + "step": 4862 + }, + { + "epoch": 3.89, + "learning_rate": 4.439820742637644e-05, + "loss": 0.8262, + "step": 4863 + }, + { + "epoch": 3.89, + "learning_rate": 4.436619718309859e-05, + "loss": 0.758, + "step": 4864 + }, + { + "epoch": 3.89, + "learning_rate": 4.4334186939820746e-05, + "loss": 0.7239, + "step": 4865 + }, + { + "epoch": 3.89, + "learning_rate": 4.43021766965429e-05, + "loss": 0.707, + "step": 4866 + }, + { + "epoch": 3.89, + "learning_rate": 4.427016645326505e-05, + "loss": 0.6235, + "step": 4867 + }, + { + "epoch": 3.89, + "learning_rate": 4.42381562099872e-05, + "loss": 0.6687, + "step": 4868 + }, + { + "epoch": 3.9, + "learning_rate": 4.420614596670935e-05, + "loss": 0.6954, + "step": 4869 + }, + { + "epoch": 3.9, + "learning_rate": 4.41741357234315e-05, + "loss": 0.7443, + "step": 4870 + }, + { + "epoch": 3.9, + "learning_rate": 4.414212548015365e-05, + "loss": 0.7698, + "step": 4871 + }, + { + "epoch": 3.9, + "learning_rate": 4.4110115236875804e-05, + "loss": 0.721, + "step": 4872 + }, + { + "epoch": 3.9, + "learning_rate": 4.407810499359795e-05, + "loss": 0.7284, + "step": 4873 + }, + { + "epoch": 3.9, + "learning_rate": 4.40460947503201e-05, + "loss": 0.5948, + "step": 4874 + }, + { + "epoch": 3.9, + "learning_rate": 4.4014084507042256e-05, + "loss": 0.6881, + "step": 4875 + }, + { + "epoch": 3.9, + "learning_rate": 4.398207426376441e-05, + "loss": 0.6502, + "step": 4876 + }, + { + "epoch": 3.9, + "learning_rate": 4.395006402048656e-05, + "loss": 0.6559, + "step": 4877 + }, + { + "epoch": 3.9, + "learning_rate": 4.391805377720871e-05, + "loss": 0.7202, + "step": 4878 + }, + { + "epoch": 3.9, + "learning_rate": 4.388604353393086e-05, + "loss": 0.7502, + "step": 4879 + }, + { + "epoch": 3.9, + "learning_rate": 4.3854033290653014e-05, + "loss": 0.5989, + "step": 4880 + }, + { + "epoch": 3.9, + "learning_rate": 4.382202304737516e-05, + "loss": 0.6977, + "step": 4881 + }, + { + "epoch": 3.91, + "learning_rate": 4.379001280409731e-05, + "loss": 0.6239, + "step": 4882 + }, + { + "epoch": 3.91, + "learning_rate": 4.3758002560819466e-05, + "loss": 0.6674, + "step": 4883 + }, + { + "epoch": 3.91, + "learning_rate": 4.372599231754161e-05, + "loss": 0.7696, + "step": 4884 + }, + { + "epoch": 3.91, + "learning_rate": 4.3693982074263766e-05, + "loss": 0.7317, + "step": 4885 + }, + { + "epoch": 3.91, + "learning_rate": 4.366197183098591e-05, + "loss": 0.7287, + "step": 4886 + }, + { + "epoch": 3.91, + "learning_rate": 4.3629961587708065e-05, + "loss": 0.6441, + "step": 4887 + }, + { + "epoch": 3.91, + "learning_rate": 4.359795134443022e-05, + "loss": 0.7778, + "step": 4888 + }, + { + "epoch": 3.91, + "learning_rate": 4.356594110115237e-05, + "loss": 0.8689, + "step": 4889 + }, + { + "epoch": 3.91, + "learning_rate": 4.3533930857874524e-05, + "loss": 0.7159, + "step": 4890 + }, + { + "epoch": 3.91, + "learning_rate": 4.350192061459668e-05, + "loss": 0.6478, + "step": 4891 + }, + { + "epoch": 3.91, + "learning_rate": 4.346991037131882e-05, + "loss": 0.7319, + "step": 4892 + }, + { + "epoch": 3.91, + "learning_rate": 4.3437900128040976e-05, + "loss": 0.5295, + "step": 4893 + }, + { + "epoch": 3.92, + "learning_rate": 4.340588988476313e-05, + "loss": 0.6784, + "step": 4894 + }, + { + "epoch": 3.92, + "learning_rate": 4.3373879641485275e-05, + "loss": 0.6931, + "step": 4895 + }, + { + "epoch": 3.92, + "learning_rate": 4.334186939820743e-05, + "loss": 0.7339, + "step": 4896 + }, + { + "epoch": 3.92, + "learning_rate": 4.3309859154929575e-05, + "loss": 0.7547, + "step": 4897 + }, + { + "epoch": 3.92, + "learning_rate": 4.327784891165173e-05, + "loss": 0.8045, + "step": 4898 + }, + { + "epoch": 3.92, + "learning_rate": 4.324583866837388e-05, + "loss": 0.7108, + "step": 4899 + }, + { + "epoch": 3.92, + "learning_rate": 4.3213828425096034e-05, + "loss": 0.5908, + "step": 4900 + }, + { + "epoch": 3.92, + "learning_rate": 4.318181818181819e-05, + "loss": 0.6588, + "step": 4901 + }, + { + "epoch": 3.92, + "learning_rate": 4.314980793854034e-05, + "loss": 0.7149, + "step": 4902 + }, + { + "epoch": 3.92, + "learning_rate": 4.3117797695262486e-05, + "loss": 0.8251, + "step": 4903 + }, + { + "epoch": 3.92, + "learning_rate": 4.308578745198464e-05, + "loss": 0.7329, + "step": 4904 + }, + { + "epoch": 3.92, + "learning_rate": 4.3053777208706785e-05, + "loss": 0.7612, + "step": 4905 + }, + { + "epoch": 3.92, + "learning_rate": 4.302176696542894e-05, + "loss": 0.7761, + "step": 4906 + }, + { + "epoch": 3.93, + "learning_rate": 4.298975672215109e-05, + "loss": 0.7173, + "step": 4907 + }, + { + "epoch": 3.93, + "learning_rate": 4.295774647887324e-05, + "loss": 0.7757, + "step": 4908 + }, + { + "epoch": 3.93, + "learning_rate": 4.292573623559539e-05, + "loss": 0.673, + "step": 4909 + }, + { + "epoch": 3.93, + "learning_rate": 4.2893725992317543e-05, + "loss": 0.6965, + "step": 4910 + }, + { + "epoch": 3.93, + "learning_rate": 4.2861715749039696e-05, + "loss": 0.5475, + "step": 4911 + }, + { + "epoch": 3.93, + "learning_rate": 4.282970550576185e-05, + "loss": 0.6816, + "step": 4912 + }, + { + "epoch": 3.93, + "learning_rate": 4.2797695262483996e-05, + "loss": 0.6167, + "step": 4913 + }, + { + "epoch": 3.93, + "learning_rate": 4.276568501920615e-05, + "loss": 0.722, + "step": 4914 + }, + { + "epoch": 3.93, + "learning_rate": 4.27336747759283e-05, + "loss": 0.6437, + "step": 4915 + }, + { + "epoch": 3.93, + "learning_rate": 4.270166453265045e-05, + "loss": 0.8197, + "step": 4916 + }, + { + "epoch": 3.93, + "learning_rate": 4.26696542893726e-05, + "loss": 0.7746, + "step": 4917 + }, + { + "epoch": 3.93, + "learning_rate": 4.2637644046094754e-05, + "loss": 0.8358, + "step": 4918 + }, + { + "epoch": 3.94, + "learning_rate": 4.26056338028169e-05, + "loss": 0.73, + "step": 4919 + }, + { + "epoch": 3.94, + "learning_rate": 4.257362355953905e-05, + "loss": 0.6789, + "step": 4920 + }, + { + "epoch": 3.94, + "learning_rate": 4.2541613316261206e-05, + "loss": 0.7539, + "step": 4921 + }, + { + "epoch": 3.94, + "learning_rate": 4.250960307298336e-05, + "loss": 0.7211, + "step": 4922 + }, + { + "epoch": 3.94, + "learning_rate": 4.247759282970551e-05, + "loss": 0.5559, + "step": 4923 + }, + { + "epoch": 3.94, + "learning_rate": 4.244558258642766e-05, + "loss": 0.7992, + "step": 4924 + }, + { + "epoch": 3.94, + "learning_rate": 4.241357234314981e-05, + "loss": 0.7098, + "step": 4925 + }, + { + "epoch": 3.94, + "learning_rate": 4.2381562099871964e-05, + "loss": 0.7098, + "step": 4926 + }, + { + "epoch": 3.94, + "learning_rate": 4.234955185659411e-05, + "loss": 0.6701, + "step": 4927 + }, + { + "epoch": 3.94, + "learning_rate": 4.2317541613316264e-05, + "loss": 0.6412, + "step": 4928 + }, + { + "epoch": 3.94, + "learning_rate": 4.228553137003842e-05, + "loss": 0.5857, + "step": 4929 + }, + { + "epoch": 3.94, + "learning_rate": 4.225352112676056e-05, + "loss": 0.6688, + "step": 4930 + }, + { + "epoch": 3.94, + "learning_rate": 4.2221510883482716e-05, + "loss": 0.7295, + "step": 4931 + }, + { + "epoch": 3.95, + "learning_rate": 4.218950064020486e-05, + "loss": 0.8218, + "step": 4932 + }, + { + "epoch": 3.95, + "learning_rate": 4.2157490396927015e-05, + "loss": 0.77, + "step": 4933 + }, + { + "epoch": 3.95, + "learning_rate": 4.2125480153649175e-05, + "loss": 0.7448, + "step": 4934 + }, + { + "epoch": 3.95, + "learning_rate": 4.209346991037132e-05, + "loss": 0.6085, + "step": 4935 + }, + { + "epoch": 3.95, + "learning_rate": 4.2061459667093474e-05, + "loss": 0.6356, + "step": 4936 + }, + { + "epoch": 3.95, + "learning_rate": 4.202944942381563e-05, + "loss": 0.6851, + "step": 4937 + }, + { + "epoch": 3.95, + "learning_rate": 4.1997439180537773e-05, + "loss": 0.7273, + "step": 4938 + }, + { + "epoch": 3.95, + "learning_rate": 4.1965428937259926e-05, + "loss": 0.8714, + "step": 4939 + }, + { + "epoch": 3.95, + "learning_rate": 4.193341869398207e-05, + "loss": 0.7041, + "step": 4940 + }, + { + "epoch": 3.95, + "learning_rate": 4.1901408450704226e-05, + "loss": 0.764, + "step": 4941 + }, + { + "epoch": 3.95, + "learning_rate": 4.186939820742638e-05, + "loss": 0.6206, + "step": 4942 + }, + { + "epoch": 3.95, + "learning_rate": 4.1837387964148525e-05, + "loss": 0.6843, + "step": 4943 + }, + { + "epoch": 3.96, + "learning_rate": 4.180537772087068e-05, + "loss": 0.5801, + "step": 4944 + }, + { + "epoch": 3.96, + "learning_rate": 4.177336747759283e-05, + "loss": 0.6745, + "step": 4945 + }, + { + "epoch": 3.96, + "learning_rate": 4.1741357234314984e-05, + "loss": 0.63, + "step": 4946 + }, + { + "epoch": 3.96, + "learning_rate": 4.170934699103714e-05, + "loss": 0.8455, + "step": 4947 + }, + { + "epoch": 3.96, + "learning_rate": 4.167733674775928e-05, + "loss": 0.7732, + "step": 4948 + }, + { + "epoch": 3.96, + "learning_rate": 4.1645326504481436e-05, + "loss": 0.7804, + "step": 4949 + }, + { + "epoch": 3.96, + "learning_rate": 4.161331626120359e-05, + "loss": 0.6926, + "step": 4950 + }, + { + "epoch": 3.96, + "learning_rate": 4.1581306017925735e-05, + "loss": 0.6171, + "step": 4951 + }, + { + "epoch": 3.96, + "learning_rate": 4.154929577464789e-05, + "loss": 0.6728, + "step": 4952 + }, + { + "epoch": 3.96, + "learning_rate": 4.151728553137004e-05, + "loss": 0.675, + "step": 4953 + }, + { + "epoch": 3.96, + "learning_rate": 4.148527528809219e-05, + "loss": 0.7139, + "step": 4954 + }, + { + "epoch": 3.96, + "learning_rate": 4.145326504481434e-05, + "loss": 0.7632, + "step": 4955 + }, + { + "epoch": 3.96, + "learning_rate": 4.1421254801536494e-05, + "loss": 0.6265, + "step": 4956 + }, + { + "epoch": 3.97, + "learning_rate": 4.138924455825865e-05, + "loss": 0.7263, + "step": 4957 + }, + { + "epoch": 3.97, + "learning_rate": 4.13572343149808e-05, + "loss": 0.6807, + "step": 4958 + }, + { + "epoch": 3.97, + "learning_rate": 4.1325224071702946e-05, + "loss": 0.6963, + "step": 4959 + }, + { + "epoch": 3.97, + "learning_rate": 4.12932138284251e-05, + "loss": 0.663, + "step": 4960 + }, + { + "epoch": 3.97, + "learning_rate": 4.126120358514725e-05, + "loss": 0.6964, + "step": 4961 + }, + { + "epoch": 3.97, + "learning_rate": 4.12291933418694e-05, + "loss": 0.6245, + "step": 4962 + }, + { + "epoch": 3.97, + "learning_rate": 4.119718309859155e-05, + "loss": 0.6982, + "step": 4963 + }, + { + "epoch": 3.97, + "learning_rate": 4.1165172855313704e-05, + "loss": 0.6507, + "step": 4964 + }, + { + "epoch": 3.97, + "learning_rate": 4.113316261203585e-05, + "loss": 0.596, + "step": 4965 + }, + { + "epoch": 3.97, + "learning_rate": 4.1101152368758003e-05, + "loss": 0.6324, + "step": 4966 + }, + { + "epoch": 3.97, + "learning_rate": 4.1069142125480156e-05, + "loss": 0.7334, + "step": 4967 + }, + { + "epoch": 3.97, + "learning_rate": 4.103713188220231e-05, + "loss": 0.679, + "step": 4968 + }, + { + "epoch": 3.98, + "learning_rate": 4.100512163892446e-05, + "loss": 0.6683, + "step": 4969 + }, + { + "epoch": 3.98, + "learning_rate": 4.097311139564661e-05, + "loss": 0.593, + "step": 4970 + }, + { + "epoch": 3.98, + "learning_rate": 4.094110115236876e-05, + "loss": 0.7005, + "step": 4971 + }, + { + "epoch": 3.98, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.7182, + "step": 4972 + }, + { + "epoch": 3.98, + "learning_rate": 4.087708066581306e-05, + "loss": 0.7241, + "step": 4973 + }, + { + "epoch": 3.98, + "learning_rate": 4.0845070422535214e-05, + "loss": 0.7283, + "step": 4974 + }, + { + "epoch": 3.98, + "learning_rate": 4.081306017925736e-05, + "loss": 0.7298, + "step": 4975 + }, + { + "epoch": 3.98, + "learning_rate": 4.078104993597951e-05, + "loss": 0.75, + "step": 4976 + }, + { + "epoch": 3.98, + "learning_rate": 4.0749039692701666e-05, + "loss": 0.6942, + "step": 4977 + }, + { + "epoch": 3.98, + "learning_rate": 4.071702944942381e-05, + "loss": 0.6125, + "step": 4978 + }, + { + "epoch": 3.98, + "learning_rate": 4.068501920614597e-05, + "loss": 0.6476, + "step": 4979 + }, + { + "epoch": 3.98, + "learning_rate": 4.0653008962868125e-05, + "loss": 0.6045, + "step": 4980 + }, + { + "epoch": 3.98, + "learning_rate": 4.062099871959027e-05, + "loss": 0.6285, + "step": 4981 + }, + { + "epoch": 3.99, + "learning_rate": 4.0588988476312425e-05, + "loss": 0.698, + "step": 4982 + }, + { + "epoch": 3.99, + "learning_rate": 4.055697823303457e-05, + "loss": 0.7272, + "step": 4983 + }, + { + "epoch": 3.99, + "learning_rate": 4.0524967989756724e-05, + "loss": 0.6678, + "step": 4984 + }, + { + "epoch": 3.99, + "learning_rate": 4.049295774647888e-05, + "loss": 0.7069, + "step": 4985 + }, + { + "epoch": 3.99, + "learning_rate": 4.046094750320102e-05, + "loss": 0.6096, + "step": 4986 + }, + { + "epoch": 3.99, + "learning_rate": 4.0428937259923176e-05, + "loss": 0.7045, + "step": 4987 + }, + { + "epoch": 3.99, + "learning_rate": 4.039692701664533e-05, + "loss": 0.7585, + "step": 4988 + }, + { + "epoch": 3.99, + "learning_rate": 4.0364916773367475e-05, + "loss": 0.6947, + "step": 4989 + }, + { + "epoch": 3.99, + "learning_rate": 4.033290653008963e-05, + "loss": 0.7312, + "step": 4990 + }, + { + "epoch": 3.99, + "learning_rate": 4.030089628681178e-05, + "loss": 0.7574, + "step": 4991 + }, + { + "epoch": 3.99, + "learning_rate": 4.0268886043533934e-05, + "loss": 0.6886, + "step": 4992 + }, + { + "epoch": 3.99, + "learning_rate": 4.023687580025609e-05, + "loss": 0.7358, + "step": 4993 + }, + { + "epoch": 4.0, + "learning_rate": 4.0204865556978234e-05, + "loss": 0.7224, + "step": 4994 + }, + { + "epoch": 4.0, + "learning_rate": 4.0172855313700387e-05, + "loss": 0.7324, + "step": 4995 + }, + { + "epoch": 4.0, + "learning_rate": 4.014084507042254e-05, + "loss": 0.7573, + "step": 4996 + }, + { + "epoch": 4.0, + "learning_rate": 4.0108834827144686e-05, + "loss": 0.8397, + "step": 4997 + }, + { + "epoch": 4.0, + "learning_rate": 4.007682458386684e-05, + "loss": 0.6686, + "step": 4998 + }, + { + "epoch": 4.0, + "learning_rate": 4.004481434058899e-05, + "loss": 0.6521, + "step": 4999 + }, + { + "epoch": 4.0, + "learning_rate": 4.001280409731114e-05, + "loss": 0.6891, + "step": 5000 + }, + { + "epoch": 4.0, + "learning_rate": 3.998079385403329e-05, + "loss": 0.6021, + "step": 5001 + }, + { + "epoch": 4.0, + "learning_rate": 3.9948783610755444e-05, + "loss": 0.6857, + "step": 5002 + }, + { + "epoch": 4.0, + "learning_rate": 3.99167733674776e-05, + "loss": 0.6741, + "step": 5003 + }, + { + "epoch": 4.0, + "learning_rate": 3.988476312419975e-05, + "loss": 0.6328, + "step": 5004 + }, + { + "epoch": 4.0, + "learning_rate": 3.9852752880921896e-05, + "loss": 0.6008, + "step": 5005 + }, + { + "epoch": 4.0, + "learning_rate": 3.982074263764405e-05, + "loss": 0.6513, + "step": 5006 + }, + { + "epoch": 4.01, + "learning_rate": 3.97887323943662e-05, + "loss": 0.6408, + "step": 5007 + }, + { + "epoch": 4.01, + "learning_rate": 3.975672215108835e-05, + "loss": 0.6809, + "step": 5008 + }, + { + "epoch": 4.01, + "learning_rate": 3.97247119078105e-05, + "loss": 0.5319, + "step": 5009 + }, + { + "epoch": 4.01, + "learning_rate": 3.969270166453265e-05, + "loss": 0.7417, + "step": 5010 + }, + { + "epoch": 4.01, + "learning_rate": 3.96606914212548e-05, + "loss": 0.581, + "step": 5011 + }, + { + "epoch": 4.01, + "learning_rate": 3.9628681177976954e-05, + "loss": 0.7453, + "step": 5012 + }, + { + "epoch": 4.01, + "learning_rate": 3.959667093469911e-05, + "loss": 0.7403, + "step": 5013 + }, + { + "epoch": 4.01, + "learning_rate": 3.956466069142126e-05, + "loss": 0.6948, + "step": 5014 + }, + { + "epoch": 4.01, + "learning_rate": 3.953265044814341e-05, + "loss": 0.7438, + "step": 5015 + }, + { + "epoch": 4.01, + "learning_rate": 3.950064020486556e-05, + "loss": 0.7043, + "step": 5016 + }, + { + "epoch": 4.01, + "learning_rate": 3.946862996158771e-05, + "loss": 0.5647, + "step": 5017 + }, + { + "epoch": 4.01, + "learning_rate": 3.943661971830986e-05, + "loss": 0.6786, + "step": 5018 + }, + { + "epoch": 4.02, + "learning_rate": 3.940460947503201e-05, + "loss": 0.8185, + "step": 5019 + }, + { + "epoch": 4.02, + "learning_rate": 3.9372599231754164e-05, + "loss": 0.5862, + "step": 5020 + }, + { + "epoch": 4.02, + "learning_rate": 3.934058898847631e-05, + "loss": 0.846, + "step": 5021 + }, + { + "epoch": 4.02, + "learning_rate": 3.9308578745198464e-05, + "loss": 0.7179, + "step": 5022 + }, + { + "epoch": 4.02, + "learning_rate": 3.9276568501920617e-05, + "loss": 0.6444, + "step": 5023 + }, + { + "epoch": 4.02, + "learning_rate": 3.924455825864277e-05, + "loss": 0.6342, + "step": 5024 + }, + { + "epoch": 4.02, + "learning_rate": 3.921254801536492e-05, + "loss": 0.5903, + "step": 5025 + }, + { + "epoch": 4.02, + "learning_rate": 3.918053777208707e-05, + "loss": 0.678, + "step": 5026 + }, + { + "epoch": 4.02, + "learning_rate": 3.914852752880922e-05, + "loss": 0.7005, + "step": 5027 + }, + { + "epoch": 4.02, + "learning_rate": 3.9116517285531375e-05, + "loss": 0.7838, + "step": 5028 + }, + { + "epoch": 4.02, + "learning_rate": 3.908450704225352e-05, + "loss": 0.7213, + "step": 5029 + }, + { + "epoch": 4.02, + "learning_rate": 3.9052496798975674e-05, + "loss": 0.6833, + "step": 5030 + }, + { + "epoch": 4.02, + "learning_rate": 3.902048655569783e-05, + "loss": 0.5465, + "step": 5031 + }, + { + "epoch": 4.03, + "learning_rate": 3.898847631241997e-05, + "loss": 0.6386, + "step": 5032 + }, + { + "epoch": 4.03, + "learning_rate": 3.8956466069142126e-05, + "loss": 0.6472, + "step": 5033 + }, + { + "epoch": 4.03, + "learning_rate": 3.892445582586428e-05, + "loss": 0.6218, + "step": 5034 + }, + { + "epoch": 4.03, + "learning_rate": 3.8892445582586426e-05, + "loss": 0.507, + "step": 5035 + }, + { + "epoch": 4.03, + "learning_rate": 3.886043533930858e-05, + "loss": 0.6736, + "step": 5036 + }, + { + "epoch": 4.03, + "learning_rate": 3.882842509603073e-05, + "loss": 0.7069, + "step": 5037 + }, + { + "epoch": 4.03, + "learning_rate": 3.8796414852752885e-05, + "loss": 0.6537, + "step": 5038 + }, + { + "epoch": 4.03, + "learning_rate": 3.876440460947504e-05, + "loss": 0.7206, + "step": 5039 + }, + { + "epoch": 4.03, + "learning_rate": 3.8732394366197184e-05, + "loss": 0.6005, + "step": 5040 + }, + { + "epoch": 4.03, + "learning_rate": 3.870038412291934e-05, + "loss": 0.6545, + "step": 5041 + }, + { + "epoch": 4.03, + "learning_rate": 3.866837387964149e-05, + "loss": 0.5872, + "step": 5042 + }, + { + "epoch": 4.03, + "learning_rate": 3.8636363636363636e-05, + "loss": 0.5974, + "step": 5043 + }, + { + "epoch": 4.04, + "learning_rate": 3.860435339308579e-05, + "loss": 0.6325, + "step": 5044 + }, + { + "epoch": 4.04, + "learning_rate": 3.8572343149807935e-05, + "loss": 0.6789, + "step": 5045 + }, + { + "epoch": 4.04, + "learning_rate": 3.854033290653009e-05, + "loss": 0.6971, + "step": 5046 + }, + { + "epoch": 4.04, + "learning_rate": 3.850832266325224e-05, + "loss": 0.6216, + "step": 5047 + }, + { + "epoch": 4.04, + "learning_rate": 3.8476312419974394e-05, + "loss": 0.6724, + "step": 5048 + }, + { + "epoch": 4.04, + "learning_rate": 3.844430217669655e-05, + "loss": 0.6959, + "step": 5049 + }, + { + "epoch": 4.04, + "learning_rate": 3.84122919334187e-05, + "loss": 0.8137, + "step": 5050 + }, + { + "epoch": 4.04, + "learning_rate": 3.8380281690140847e-05, + "loss": 0.7102, + "step": 5051 + }, + { + "epoch": 4.04, + "learning_rate": 3.8348271446863e-05, + "loss": 0.566, + "step": 5052 + }, + { + "epoch": 4.04, + "learning_rate": 3.8316261203585146e-05, + "loss": 0.6603, + "step": 5053 + }, + { + "epoch": 4.04, + "learning_rate": 3.82842509603073e-05, + "loss": 0.6947, + "step": 5054 + }, + { + "epoch": 4.04, + "learning_rate": 3.825224071702945e-05, + "loss": 0.6882, + "step": 5055 + }, + { + "epoch": 4.04, + "learning_rate": 3.82202304737516e-05, + "loss": 0.7073, + "step": 5056 + }, + { + "epoch": 4.05, + "learning_rate": 3.818822023047375e-05, + "loss": 0.6687, + "step": 5057 + }, + { + "epoch": 4.05, + "learning_rate": 3.8156209987195904e-05, + "loss": 0.6466, + "step": 5058 + }, + { + "epoch": 4.05, + "learning_rate": 3.812419974391806e-05, + "loss": 0.7121, + "step": 5059 + }, + { + "epoch": 4.05, + "learning_rate": 3.809218950064021e-05, + "loss": 0.6158, + "step": 5060 + }, + { + "epoch": 4.05, + "learning_rate": 3.8060179257362356e-05, + "loss": 0.7739, + "step": 5061 + }, + { + "epoch": 4.05, + "learning_rate": 3.802816901408451e-05, + "loss": 0.6347, + "step": 5062 + }, + { + "epoch": 4.05, + "learning_rate": 3.799615877080666e-05, + "loss": 0.7955, + "step": 5063 + }, + { + "epoch": 4.05, + "learning_rate": 3.796414852752881e-05, + "loss": 0.6903, + "step": 5064 + }, + { + "epoch": 4.05, + "learning_rate": 3.793213828425096e-05, + "loss": 0.6315, + "step": 5065 + }, + { + "epoch": 4.05, + "learning_rate": 3.7900128040973115e-05, + "loss": 0.7594, + "step": 5066 + }, + { + "epoch": 4.05, + "learning_rate": 3.786811779769526e-05, + "loss": 0.6586, + "step": 5067 + }, + { + "epoch": 4.05, + "learning_rate": 3.7836107554417414e-05, + "loss": 0.6865, + "step": 5068 + }, + { + "epoch": 4.06, + "learning_rate": 3.780409731113957e-05, + "loss": 0.6871, + "step": 5069 + }, + { + "epoch": 4.06, + "learning_rate": 3.777208706786172e-05, + "loss": 0.7114, + "step": 5070 + }, + { + "epoch": 4.06, + "learning_rate": 3.774007682458387e-05, + "loss": 0.7271, + "step": 5071 + }, + { + "epoch": 4.06, + "learning_rate": 3.770806658130602e-05, + "loss": 0.6443, + "step": 5072 + }, + { + "epoch": 4.06, + "learning_rate": 3.767605633802817e-05, + "loss": 0.6142, + "step": 5073 + }, + { + "epoch": 4.06, + "learning_rate": 3.7644046094750325e-05, + "loss": 0.676, + "step": 5074 + }, + { + "epoch": 4.06, + "learning_rate": 3.761203585147247e-05, + "loss": 0.5874, + "step": 5075 + }, + { + "epoch": 4.06, + "learning_rate": 3.7580025608194624e-05, + "loss": 0.6664, + "step": 5076 + }, + { + "epoch": 4.06, + "learning_rate": 3.754801536491678e-05, + "loss": 0.6475, + "step": 5077 + }, + { + "epoch": 4.06, + "learning_rate": 3.7516005121638924e-05, + "loss": 0.7103, + "step": 5078 + }, + { + "epoch": 4.06, + "learning_rate": 3.7483994878361077e-05, + "loss": 0.6781, + "step": 5079 + }, + { + "epoch": 4.06, + "learning_rate": 3.745198463508322e-05, + "loss": 0.7135, + "step": 5080 + }, + { + "epoch": 4.06, + "learning_rate": 3.7419974391805376e-05, + "loss": 0.7155, + "step": 5081 + }, + { + "epoch": 4.07, + "learning_rate": 3.7387964148527536e-05, + "loss": 0.651, + "step": 5082 + }, + { + "epoch": 4.07, + "learning_rate": 3.735595390524968e-05, + "loss": 0.6513, + "step": 5083 + }, + { + "epoch": 4.07, + "learning_rate": 3.7323943661971835e-05, + "loss": 0.615, + "step": 5084 + }, + { + "epoch": 4.07, + "learning_rate": 3.729193341869399e-05, + "loss": 0.6525, + "step": 5085 + }, + { + "epoch": 4.07, + "learning_rate": 3.7259923175416134e-05, + "loss": 0.6284, + "step": 5086 + }, + { + "epoch": 4.07, + "learning_rate": 3.722791293213829e-05, + "loss": 0.7466, + "step": 5087 + }, + { + "epoch": 4.07, + "learning_rate": 3.719590268886043e-05, + "loss": 0.7019, + "step": 5088 + }, + { + "epoch": 4.07, + "learning_rate": 3.7163892445582586e-05, + "loss": 0.7374, + "step": 5089 + }, + { + "epoch": 4.07, + "learning_rate": 3.713188220230474e-05, + "loss": 0.6759, + "step": 5090 + }, + { + "epoch": 4.07, + "learning_rate": 3.7099871959026886e-05, + "loss": 0.6021, + "step": 5091 + }, + { + "epoch": 4.07, + "learning_rate": 3.706786171574904e-05, + "loss": 0.6808, + "step": 5092 + }, + { + "epoch": 4.07, + "learning_rate": 3.703585147247119e-05, + "loss": 0.694, + "step": 5093 + }, + { + "epoch": 4.08, + "learning_rate": 3.7003841229193345e-05, + "loss": 0.6169, + "step": 5094 + }, + { + "epoch": 4.08, + "learning_rate": 3.69718309859155e-05, + "loss": 0.7538, + "step": 5095 + }, + { + "epoch": 4.08, + "learning_rate": 3.693982074263765e-05, + "loss": 0.6137, + "step": 5096 + }, + { + "epoch": 4.08, + "learning_rate": 3.69078104993598e-05, + "loss": 0.769, + "step": 5097 + }, + { + "epoch": 4.08, + "learning_rate": 3.687580025608195e-05, + "loss": 0.791, + "step": 5098 + }, + { + "epoch": 4.08, + "learning_rate": 3.6843790012804096e-05, + "loss": 0.6481, + "step": 5099 + }, + { + "epoch": 4.08, + "learning_rate": 3.681177976952625e-05, + "loss": 0.6299, + "step": 5100 + }, + { + "epoch": 4.08, + "learning_rate": 3.67797695262484e-05, + "loss": 0.6473, + "step": 5101 + }, + { + "epoch": 4.08, + "learning_rate": 3.674775928297055e-05, + "loss": 0.6989, + "step": 5102 + }, + { + "epoch": 4.08, + "learning_rate": 3.67157490396927e-05, + "loss": 0.6066, + "step": 5103 + }, + { + "epoch": 4.08, + "learning_rate": 3.6683738796414854e-05, + "loss": 0.5847, + "step": 5104 + }, + { + "epoch": 4.08, + "learning_rate": 3.665172855313701e-05, + "loss": 0.665, + "step": 5105 + }, + { + "epoch": 4.08, + "learning_rate": 3.661971830985916e-05, + "loss": 0.7021, + "step": 5106 + }, + { + "epoch": 4.09, + "learning_rate": 3.658770806658131e-05, + "loss": 0.7169, + "step": 5107 + }, + { + "epoch": 4.09, + "learning_rate": 3.655569782330346e-05, + "loss": 0.6456, + "step": 5108 + }, + { + "epoch": 4.09, + "learning_rate": 3.652368758002561e-05, + "loss": 0.7031, + "step": 5109 + }, + { + "epoch": 4.09, + "learning_rate": 3.649167733674776e-05, + "loss": 0.6613, + "step": 5110 + }, + { + "epoch": 4.09, + "learning_rate": 3.645966709346991e-05, + "loss": 0.6408, + "step": 5111 + }, + { + "epoch": 4.09, + "learning_rate": 3.6427656850192065e-05, + "loss": 0.5417, + "step": 5112 + }, + { + "epoch": 4.09, + "learning_rate": 3.639564660691421e-05, + "loss": 0.6205, + "step": 5113 + }, + { + "epoch": 4.09, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.6498, + "step": 5114 + }, + { + "epoch": 4.09, + "learning_rate": 3.633162612035852e-05, + "loss": 0.6895, + "step": 5115 + }, + { + "epoch": 4.09, + "learning_rate": 3.629961587708067e-05, + "loss": 0.5582, + "step": 5116 + }, + { + "epoch": 4.09, + "learning_rate": 3.626760563380282e-05, + "loss": 0.6216, + "step": 5117 + }, + { + "epoch": 4.09, + "learning_rate": 3.623559539052497e-05, + "loss": 0.6735, + "step": 5118 + }, + { + "epoch": 4.1, + "learning_rate": 3.620358514724712e-05, + "loss": 0.6704, + "step": 5119 + }, + { + "epoch": 4.1, + "learning_rate": 3.6171574903969275e-05, + "loss": 0.575, + "step": 5120 + }, + { + "epoch": 4.1, + "learning_rate": 3.613956466069142e-05, + "loss": 0.6438, + "step": 5121 + }, + { + "epoch": 4.1, + "learning_rate": 3.6107554417413575e-05, + "loss": 0.6656, + "step": 5122 + }, + { + "epoch": 4.1, + "learning_rate": 3.607554417413572e-05, + "loss": 0.5794, + "step": 5123 + }, + { + "epoch": 4.1, + "learning_rate": 3.6043533930857874e-05, + "loss": 0.7146, + "step": 5124 + }, + { + "epoch": 4.1, + "learning_rate": 3.601152368758003e-05, + "loss": 0.5767, + "step": 5125 + }, + { + "epoch": 4.1, + "learning_rate": 3.597951344430217e-05, + "loss": 0.644, + "step": 5126 + }, + { + "epoch": 4.1, + "learning_rate": 3.594750320102433e-05, + "loss": 0.6613, + "step": 5127 + }, + { + "epoch": 4.1, + "learning_rate": 3.5915492957746486e-05, + "loss": 0.714, + "step": 5128 + }, + { + "epoch": 4.1, + "learning_rate": 3.588348271446863e-05, + "loss": 0.7503, + "step": 5129 + }, + { + "epoch": 4.1, + "learning_rate": 3.5851472471190785e-05, + "loss": 0.6493, + "step": 5130 + }, + { + "epoch": 4.1, + "learning_rate": 3.581946222791294e-05, + "loss": 0.748, + "step": 5131 + }, + { + "epoch": 4.11, + "learning_rate": 3.5787451984635084e-05, + "loss": 0.7554, + "step": 5132 + }, + { + "epoch": 4.11, + "learning_rate": 3.575544174135724e-05, + "loss": 0.6935, + "step": 5133 + }, + { + "epoch": 4.11, + "learning_rate": 3.5723431498079384e-05, + "loss": 0.7088, + "step": 5134 + }, + { + "epoch": 4.11, + "learning_rate": 3.569142125480154e-05, + "loss": 0.6163, + "step": 5135 + }, + { + "epoch": 4.11, + "learning_rate": 3.565941101152369e-05, + "loss": 0.7323, + "step": 5136 + }, + { + "epoch": 4.11, + "learning_rate": 3.5627400768245836e-05, + "loss": 0.7044, + "step": 5137 + }, + { + "epoch": 4.11, + "learning_rate": 3.559539052496799e-05, + "loss": 0.6816, + "step": 5138 + }, + { + "epoch": 4.11, + "learning_rate": 3.556338028169014e-05, + "loss": 0.7235, + "step": 5139 + }, + { + "epoch": 4.11, + "learning_rate": 3.5531370038412295e-05, + "loss": 0.6668, + "step": 5140 + }, + { + "epoch": 4.11, + "learning_rate": 3.549935979513445e-05, + "loss": 0.7283, + "step": 5141 + }, + { + "epoch": 4.11, + "learning_rate": 3.5467349551856594e-05, + "loss": 0.6512, + "step": 5142 + }, + { + "epoch": 4.11, + "learning_rate": 3.543533930857875e-05, + "loss": 0.6886, + "step": 5143 + }, + { + "epoch": 4.12, + "learning_rate": 3.54033290653009e-05, + "loss": 0.5714, + "step": 5144 + }, + { + "epoch": 4.12, + "learning_rate": 3.5371318822023046e-05, + "loss": 0.7887, + "step": 5145 + }, + { + "epoch": 4.12, + "learning_rate": 3.53393085787452e-05, + "loss": 0.7931, + "step": 5146 + }, + { + "epoch": 4.12, + "learning_rate": 3.530729833546735e-05, + "loss": 0.7294, + "step": 5147 + }, + { + "epoch": 4.12, + "learning_rate": 3.52752880921895e-05, + "loss": 0.6986, + "step": 5148 + }, + { + "epoch": 4.12, + "learning_rate": 3.524327784891165e-05, + "loss": 0.6901, + "step": 5149 + }, + { + "epoch": 4.12, + "learning_rate": 3.5211267605633805e-05, + "loss": 0.6977, + "step": 5150 + }, + { + "epoch": 4.12, + "learning_rate": 3.517925736235596e-05, + "loss": 0.5632, + "step": 5151 + }, + { + "epoch": 4.12, + "learning_rate": 3.514724711907811e-05, + "loss": 0.6429, + "step": 5152 + }, + { + "epoch": 4.12, + "learning_rate": 3.511523687580026e-05, + "loss": 0.6129, + "step": 5153 + }, + { + "epoch": 4.12, + "learning_rate": 3.508322663252241e-05, + "loss": 0.5522, + "step": 5154 + }, + { + "epoch": 4.12, + "learning_rate": 3.505121638924456e-05, + "loss": 0.6157, + "step": 5155 + }, + { + "epoch": 4.12, + "learning_rate": 3.501920614596671e-05, + "loss": 0.6209, + "step": 5156 + }, + { + "epoch": 4.13, + "learning_rate": 3.498719590268886e-05, + "loss": 0.5335, + "step": 5157 + }, + { + "epoch": 4.13, + "learning_rate": 3.495518565941101e-05, + "loss": 0.842, + "step": 5158 + }, + { + "epoch": 4.13, + "learning_rate": 3.492317541613316e-05, + "loss": 0.606, + "step": 5159 + }, + { + "epoch": 4.13, + "learning_rate": 3.4891165172855314e-05, + "loss": 0.645, + "step": 5160 + }, + { + "epoch": 4.13, + "learning_rate": 3.485915492957747e-05, + "loss": 0.754, + "step": 5161 + }, + { + "epoch": 4.13, + "learning_rate": 3.482714468629962e-05, + "loss": 0.6491, + "step": 5162 + }, + { + "epoch": 4.13, + "learning_rate": 3.4795134443021773e-05, + "loss": 0.712, + "step": 5163 + }, + { + "epoch": 4.13, + "learning_rate": 3.476312419974392e-05, + "loss": 0.6288, + "step": 5164 + }, + { + "epoch": 4.13, + "learning_rate": 3.473111395646607e-05, + "loss": 0.6678, + "step": 5165 + }, + { + "epoch": 4.13, + "learning_rate": 3.4699103713188226e-05, + "loss": 0.692, + "step": 5166 + }, + { + "epoch": 4.13, + "learning_rate": 3.466709346991037e-05, + "loss": 0.7684, + "step": 5167 + }, + { + "epoch": 4.13, + "learning_rate": 3.4635083226632525e-05, + "loss": 0.6433, + "step": 5168 + }, + { + "epoch": 4.14, + "learning_rate": 3.460307298335467e-05, + "loss": 0.6717, + "step": 5169 + }, + { + "epoch": 4.14, + "learning_rate": 3.4571062740076824e-05, + "loss": 0.6177, + "step": 5170 + }, + { + "epoch": 4.14, + "learning_rate": 3.453905249679898e-05, + "loss": 0.5983, + "step": 5171 + }, + { + "epoch": 4.14, + "learning_rate": 3.450704225352113e-05, + "loss": 0.6657, + "step": 5172 + }, + { + "epoch": 4.14, + "learning_rate": 3.447503201024328e-05, + "loss": 0.6766, + "step": 5173 + }, + { + "epoch": 4.14, + "learning_rate": 3.4443021766965436e-05, + "loss": 0.7164, + "step": 5174 + }, + { + "epoch": 4.14, + "learning_rate": 3.441101152368758e-05, + "loss": 0.69, + "step": 5175 + }, + { + "epoch": 4.14, + "learning_rate": 3.4379001280409735e-05, + "loss": 0.6156, + "step": 5176 + }, + { + "epoch": 4.14, + "learning_rate": 3.434699103713188e-05, + "loss": 0.737, + "step": 5177 + }, + { + "epoch": 4.14, + "learning_rate": 3.4314980793854035e-05, + "loss": 0.6852, + "step": 5178 + }, + { + "epoch": 4.14, + "learning_rate": 3.428297055057619e-05, + "loss": 0.673, + "step": 5179 + }, + { + "epoch": 4.14, + "learning_rate": 3.4250960307298334e-05, + "loss": 0.8903, + "step": 5180 + }, + { + "epoch": 4.14, + "learning_rate": 3.421895006402049e-05, + "loss": 0.5664, + "step": 5181 + }, + { + "epoch": 4.15, + "learning_rate": 3.418693982074264e-05, + "loss": 0.6454, + "step": 5182 + }, + { + "epoch": 4.15, + "learning_rate": 3.4154929577464786e-05, + "loss": 0.5052, + "step": 5183 + }, + { + "epoch": 4.15, + "learning_rate": 3.412291933418694e-05, + "loss": 0.6183, + "step": 5184 + }, + { + "epoch": 4.15, + "learning_rate": 3.409090909090909e-05, + "loss": 0.7045, + "step": 5185 + }, + { + "epoch": 4.15, + "learning_rate": 3.4058898847631245e-05, + "loss": 0.6927, + "step": 5186 + }, + { + "epoch": 4.15, + "learning_rate": 3.40268886043534e-05, + "loss": 0.6974, + "step": 5187 + }, + { + "epoch": 4.15, + "learning_rate": 3.3994878361075544e-05, + "loss": 0.6301, + "step": 5188 + }, + { + "epoch": 4.15, + "learning_rate": 3.39628681177977e-05, + "loss": 0.698, + "step": 5189 + }, + { + "epoch": 4.15, + "learning_rate": 3.393085787451985e-05, + "loss": 0.6259, + "step": 5190 + }, + { + "epoch": 4.15, + "learning_rate": 3.3898847631242e-05, + "loss": 0.6823, + "step": 5191 + }, + { + "epoch": 4.15, + "learning_rate": 3.386683738796415e-05, + "loss": 0.656, + "step": 5192 + }, + { + "epoch": 4.15, + "learning_rate": 3.3834827144686296e-05, + "loss": 0.7861, + "step": 5193 + }, + { + "epoch": 4.16, + "learning_rate": 3.380281690140845e-05, + "loss": 0.544, + "step": 5194 + }, + { + "epoch": 4.16, + "learning_rate": 3.37708066581306e-05, + "loss": 0.586, + "step": 5195 + }, + { + "epoch": 4.16, + "learning_rate": 3.3738796414852755e-05, + "loss": 0.724, + "step": 5196 + }, + { + "epoch": 4.16, + "learning_rate": 3.370678617157491e-05, + "loss": 0.7102, + "step": 5197 + }, + { + "epoch": 4.16, + "learning_rate": 3.367477592829706e-05, + "loss": 0.7212, + "step": 5198 + }, + { + "epoch": 4.16, + "learning_rate": 3.364276568501921e-05, + "loss": 0.6213, + "step": 5199 + }, + { + "epoch": 4.16, + "learning_rate": 3.361075544174136e-05, + "loss": 0.6506, + "step": 5200 + }, + { + "epoch": 4.16, + "learning_rate": 3.357874519846351e-05, + "loss": 0.6582, + "step": 5201 + }, + { + "epoch": 4.16, + "learning_rate": 3.354673495518566e-05, + "loss": 0.6426, + "step": 5202 + }, + { + "epoch": 4.16, + "learning_rate": 3.351472471190781e-05, + "loss": 0.6255, + "step": 5203 + }, + { + "epoch": 4.16, + "learning_rate": 3.348271446862996e-05, + "loss": 0.6057, + "step": 5204 + }, + { + "epoch": 4.16, + "learning_rate": 3.345070422535211e-05, + "loss": 0.6264, + "step": 5205 + }, + { + "epoch": 4.16, + "learning_rate": 3.3418693982074265e-05, + "loss": 0.688, + "step": 5206 + }, + { + "epoch": 4.17, + "learning_rate": 3.338668373879642e-05, + "loss": 0.6414, + "step": 5207 + }, + { + "epoch": 4.17, + "learning_rate": 3.335467349551857e-05, + "loss": 0.7045, + "step": 5208 + }, + { + "epoch": 4.17, + "learning_rate": 3.3322663252240724e-05, + "loss": 0.6398, + "step": 5209 + }, + { + "epoch": 4.17, + "learning_rate": 3.329065300896287e-05, + "loss": 0.6006, + "step": 5210 + }, + { + "epoch": 4.17, + "learning_rate": 3.325864276568502e-05, + "loss": 0.801, + "step": 5211 + }, + { + "epoch": 4.17, + "learning_rate": 3.322663252240717e-05, + "loss": 0.6635, + "step": 5212 + }, + { + "epoch": 4.17, + "learning_rate": 3.319462227912932e-05, + "loss": 0.6328, + "step": 5213 + }, + { + "epoch": 4.17, + "learning_rate": 3.3162612035851475e-05, + "loss": 0.6821, + "step": 5214 + }, + { + "epoch": 4.17, + "learning_rate": 3.313060179257362e-05, + "loss": 0.598, + "step": 5215 + }, + { + "epoch": 4.17, + "learning_rate": 3.3098591549295775e-05, + "loss": 0.6764, + "step": 5216 + }, + { + "epoch": 4.17, + "learning_rate": 3.306658130601793e-05, + "loss": 0.6303, + "step": 5217 + }, + { + "epoch": 4.17, + "learning_rate": 3.303457106274008e-05, + "loss": 0.72, + "step": 5218 + }, + { + "epoch": 4.18, + "learning_rate": 3.3002560819462234e-05, + "loss": 0.6282, + "step": 5219 + }, + { + "epoch": 4.18, + "learning_rate": 3.297055057618438e-05, + "loss": 0.7607, + "step": 5220 + }, + { + "epoch": 4.18, + "learning_rate": 3.293854033290653e-05, + "loss": 0.559, + "step": 5221 + }, + { + "epoch": 4.18, + "learning_rate": 3.2906530089628686e-05, + "loss": 0.7558, + "step": 5222 + }, + { + "epoch": 4.18, + "learning_rate": 3.287451984635083e-05, + "loss": 0.6373, + "step": 5223 + }, + { + "epoch": 4.18, + "learning_rate": 3.2842509603072985e-05, + "loss": 0.6427, + "step": 5224 + }, + { + "epoch": 4.18, + "learning_rate": 3.281049935979514e-05, + "loss": 0.7338, + "step": 5225 + }, + { + "epoch": 4.18, + "learning_rate": 3.2778489116517284e-05, + "loss": 0.7189, + "step": 5226 + }, + { + "epoch": 4.18, + "learning_rate": 3.274647887323944e-05, + "loss": 0.6977, + "step": 5227 + }, + { + "epoch": 4.18, + "learning_rate": 3.2714468629961584e-05, + "loss": 0.739, + "step": 5228 + }, + { + "epoch": 4.18, + "learning_rate": 3.2682458386683737e-05, + "loss": 0.6425, + "step": 5229 + }, + { + "epoch": 4.18, + "learning_rate": 3.265044814340589e-05, + "loss": 0.6482, + "step": 5230 + }, + { + "epoch": 4.18, + "learning_rate": 3.261843790012804e-05, + "loss": 0.6759, + "step": 5231 + }, + { + "epoch": 4.19, + "learning_rate": 3.2586427656850196e-05, + "loss": 0.6177, + "step": 5232 + }, + { + "epoch": 4.19, + "learning_rate": 3.255441741357235e-05, + "loss": 0.7699, + "step": 5233 + }, + { + "epoch": 4.19, + "learning_rate": 3.2522407170294495e-05, + "loss": 0.5653, + "step": 5234 + }, + { + "epoch": 4.19, + "learning_rate": 3.249039692701665e-05, + "loss": 0.725, + "step": 5235 + }, + { + "epoch": 4.19, + "learning_rate": 3.24583866837388e-05, + "loss": 0.6683, + "step": 5236 + }, + { + "epoch": 4.19, + "learning_rate": 3.242637644046095e-05, + "loss": 0.6399, + "step": 5237 + }, + { + "epoch": 4.19, + "learning_rate": 3.23943661971831e-05, + "loss": 0.6915, + "step": 5238 + }, + { + "epoch": 4.19, + "learning_rate": 3.2362355953905246e-05, + "loss": 0.6994, + "step": 5239 + }, + { + "epoch": 4.19, + "learning_rate": 3.23303457106274e-05, + "loss": 0.6228, + "step": 5240 + }, + { + "epoch": 4.19, + "learning_rate": 3.229833546734955e-05, + "loss": 0.668, + "step": 5241 + }, + { + "epoch": 4.19, + "learning_rate": 3.2266325224071705e-05, + "loss": 0.6997, + "step": 5242 + }, + { + "epoch": 4.19, + "learning_rate": 3.223431498079386e-05, + "loss": 0.6011, + "step": 5243 + }, + { + "epoch": 4.2, + "learning_rate": 3.220230473751601e-05, + "loss": 0.7141, + "step": 5244 + }, + { + "epoch": 4.2, + "learning_rate": 3.217029449423816e-05, + "loss": 0.6852, + "step": 5245 + }, + { + "epoch": 4.2, + "learning_rate": 3.213828425096031e-05, + "loss": 0.7747, + "step": 5246 + }, + { + "epoch": 4.2, + "learning_rate": 3.210627400768246e-05, + "loss": 0.672, + "step": 5247 + }, + { + "epoch": 4.2, + "learning_rate": 3.207426376440461e-05, + "loss": 0.6178, + "step": 5248 + }, + { + "epoch": 4.2, + "learning_rate": 3.204225352112676e-05, + "loss": 0.6686, + "step": 5249 + }, + { + "epoch": 4.2, + "learning_rate": 3.201024327784891e-05, + "loss": 0.7281, + "step": 5250 + }, + { + "epoch": 4.2, + "learning_rate": 3.197823303457106e-05, + "loss": 0.5741, + "step": 5251 + }, + { + "epoch": 4.2, + "learning_rate": 3.1946222791293215e-05, + "loss": 0.5512, + "step": 5252 + }, + { + "epoch": 4.2, + "learning_rate": 3.191421254801537e-05, + "loss": 0.7622, + "step": 5253 + }, + { + "epoch": 4.2, + "learning_rate": 3.188220230473752e-05, + "loss": 0.6493, + "step": 5254 + }, + { + "epoch": 4.2, + "learning_rate": 3.185019206145967e-05, + "loss": 0.605, + "step": 5255 + }, + { + "epoch": 4.2, + "learning_rate": 3.181818181818182e-05, + "loss": 0.6595, + "step": 5256 + }, + { + "epoch": 4.21, + "learning_rate": 3.178617157490397e-05, + "loss": 0.6204, + "step": 5257 + }, + { + "epoch": 4.21, + "learning_rate": 3.175416133162612e-05, + "loss": 0.623, + "step": 5258 + }, + { + "epoch": 4.21, + "learning_rate": 3.172215108834827e-05, + "loss": 0.5982, + "step": 5259 + }, + { + "epoch": 4.21, + "learning_rate": 3.1690140845070426e-05, + "loss": 0.6751, + "step": 5260 + }, + { + "epoch": 4.21, + "learning_rate": 3.165813060179257e-05, + "loss": 0.5153, + "step": 5261 + }, + { + "epoch": 4.21, + "learning_rate": 3.1626120358514725e-05, + "loss": 0.63, + "step": 5262 + }, + { + "epoch": 4.21, + "learning_rate": 3.159411011523688e-05, + "loss": 0.65, + "step": 5263 + }, + { + "epoch": 4.21, + "learning_rate": 3.156209987195903e-05, + "loss": 0.6807, + "step": 5264 + }, + { + "epoch": 4.21, + "learning_rate": 3.1530089628681184e-05, + "loss": 0.6238, + "step": 5265 + }, + { + "epoch": 4.21, + "learning_rate": 3.149807938540333e-05, + "loss": 0.6202, + "step": 5266 + }, + { + "epoch": 4.21, + "learning_rate": 3.146606914212548e-05, + "loss": 0.6171, + "step": 5267 + }, + { + "epoch": 4.21, + "learning_rate": 3.1434058898847636e-05, + "loss": 0.6467, + "step": 5268 + }, + { + "epoch": 4.22, + "learning_rate": 3.140204865556978e-05, + "loss": 0.6194, + "step": 5269 + }, + { + "epoch": 4.22, + "learning_rate": 3.1370038412291935e-05, + "loss": 0.7074, + "step": 5270 + }, + { + "epoch": 4.22, + "learning_rate": 3.133802816901409e-05, + "loss": 0.7263, + "step": 5271 + }, + { + "epoch": 4.22, + "learning_rate": 3.1306017925736235e-05, + "loss": 0.6102, + "step": 5272 + }, + { + "epoch": 4.22, + "learning_rate": 3.127400768245839e-05, + "loss": 0.7727, + "step": 5273 + }, + { + "epoch": 4.22, + "learning_rate": 3.1241997439180534e-05, + "loss": 0.6285, + "step": 5274 + }, + { + "epoch": 4.22, + "learning_rate": 3.120998719590269e-05, + "loss": 0.7517, + "step": 5275 + }, + { + "epoch": 4.22, + "learning_rate": 3.117797695262485e-05, + "loss": 0.7618, + "step": 5276 + }, + { + "epoch": 4.22, + "learning_rate": 3.114596670934699e-05, + "loss": 0.5989, + "step": 5277 + }, + { + "epoch": 4.22, + "learning_rate": 3.1113956466069146e-05, + "loss": 0.6422, + "step": 5278 + }, + { + "epoch": 4.22, + "learning_rate": 3.10819462227913e-05, + "loss": 0.6629, + "step": 5279 + }, + { + "epoch": 4.22, + "learning_rate": 3.1049935979513445e-05, + "loss": 0.6521, + "step": 5280 + }, + { + "epoch": 4.22, + "learning_rate": 3.10179257362356e-05, + "loss": 0.7657, + "step": 5281 + }, + { + "epoch": 4.23, + "learning_rate": 3.0985915492957744e-05, + "loss": 0.5988, + "step": 5282 + }, + { + "epoch": 4.23, + "learning_rate": 3.09539052496799e-05, + "loss": 0.753, + "step": 5283 + }, + { + "epoch": 4.23, + "learning_rate": 3.092189500640205e-05, + "loss": 0.655, + "step": 5284 + }, + { + "epoch": 4.23, + "learning_rate": 3.0889884763124197e-05, + "loss": 0.5489, + "step": 5285 + }, + { + "epoch": 4.23, + "learning_rate": 3.085787451984635e-05, + "loss": 0.7285, + "step": 5286 + }, + { + "epoch": 4.23, + "learning_rate": 3.08258642765685e-05, + "loss": 0.6484, + "step": 5287 + }, + { + "epoch": 4.23, + "learning_rate": 3.0793854033290656e-05, + "loss": 0.5359, + "step": 5288 + }, + { + "epoch": 4.23, + "learning_rate": 3.076184379001281e-05, + "loss": 0.6728, + "step": 5289 + }, + { + "epoch": 4.23, + "learning_rate": 3.0729833546734955e-05, + "loss": 0.6176, + "step": 5290 + }, + { + "epoch": 4.23, + "learning_rate": 3.069782330345711e-05, + "loss": 0.5841, + "step": 5291 + }, + { + "epoch": 4.23, + "learning_rate": 3.066581306017926e-05, + "loss": 0.5999, + "step": 5292 + }, + { + "epoch": 4.23, + "learning_rate": 3.063380281690141e-05, + "loss": 0.5406, + "step": 5293 + }, + { + "epoch": 4.24, + "learning_rate": 3.060179257362356e-05, + "loss": 0.6608, + "step": 5294 + }, + { + "epoch": 4.24, + "learning_rate": 3.056978233034571e-05, + "loss": 0.6487, + "step": 5295 + }, + { + "epoch": 4.24, + "learning_rate": 3.053777208706786e-05, + "loss": 0.5892, + "step": 5296 + }, + { + "epoch": 4.24, + "learning_rate": 3.0505761843790016e-05, + "loss": 0.6522, + "step": 5297 + }, + { + "epoch": 4.24, + "learning_rate": 3.0473751600512162e-05, + "loss": 0.6607, + "step": 5298 + }, + { + "epoch": 4.24, + "learning_rate": 3.0441741357234315e-05, + "loss": 0.6999, + "step": 5299 + }, + { + "epoch": 4.24, + "learning_rate": 3.0409731113956468e-05, + "loss": 0.6472, + "step": 5300 + }, + { + "epoch": 4.24, + "learning_rate": 3.0377720870678618e-05, + "loss": 0.6596, + "step": 5301 + }, + { + "epoch": 4.24, + "learning_rate": 3.034571062740077e-05, + "loss": 0.5675, + "step": 5302 + }, + { + "epoch": 4.24, + "learning_rate": 3.0313700384122924e-05, + "loss": 0.5999, + "step": 5303 + }, + { + "epoch": 4.24, + "learning_rate": 3.028169014084507e-05, + "loss": 0.7481, + "step": 5304 + }, + { + "epoch": 4.24, + "learning_rate": 3.0249679897567223e-05, + "loss": 0.7533, + "step": 5305 + }, + { + "epoch": 4.24, + "learning_rate": 3.0217669654289376e-05, + "loss": 0.5903, + "step": 5306 + }, + { + "epoch": 4.25, + "learning_rate": 3.0185659411011526e-05, + "loss": 0.6855, + "step": 5307 + }, + { + "epoch": 4.25, + "learning_rate": 3.015364916773368e-05, + "loss": 0.7491, + "step": 5308 + }, + { + "epoch": 4.25, + "learning_rate": 3.0121638924455825e-05, + "loss": 0.6039, + "step": 5309 + }, + { + "epoch": 4.25, + "learning_rate": 3.0089628681177978e-05, + "loss": 0.6281, + "step": 5310 + }, + { + "epoch": 4.25, + "learning_rate": 3.005761843790013e-05, + "loss": 0.6453, + "step": 5311 + }, + { + "epoch": 4.25, + "learning_rate": 3.002560819462228e-05, + "loss": 0.6134, + "step": 5312 + }, + { + "epoch": 4.25, + "learning_rate": 2.9993597951344433e-05, + "loss": 0.647, + "step": 5313 + }, + { + "epoch": 4.25, + "learning_rate": 2.9961587708066586e-05, + "loss": 0.7176, + "step": 5314 + }, + { + "epoch": 4.25, + "learning_rate": 2.9929577464788733e-05, + "loss": 0.5648, + "step": 5315 + }, + { + "epoch": 4.25, + "learning_rate": 2.9897567221510886e-05, + "loss": 0.688, + "step": 5316 + }, + { + "epoch": 4.25, + "learning_rate": 2.9865556978233032e-05, + "loss": 0.6736, + "step": 5317 + }, + { + "epoch": 4.25, + "learning_rate": 2.9833546734955188e-05, + "loss": 0.7056, + "step": 5318 + }, + { + "epoch": 4.26, + "learning_rate": 2.980153649167734e-05, + "loss": 0.7305, + "step": 5319 + }, + { + "epoch": 4.26, + "learning_rate": 2.9769526248399488e-05, + "loss": 0.5886, + "step": 5320 + }, + { + "epoch": 4.26, + "learning_rate": 2.973751600512164e-05, + "loss": 0.6955, + "step": 5321 + }, + { + "epoch": 4.26, + "learning_rate": 2.9705505761843794e-05, + "loss": 0.6499, + "step": 5322 + }, + { + "epoch": 4.26, + "learning_rate": 2.967349551856594e-05, + "loss": 0.6834, + "step": 5323 + }, + { + "epoch": 4.26, + "learning_rate": 2.9641485275288096e-05, + "loss": 0.6474, + "step": 5324 + }, + { + "epoch": 4.26, + "learning_rate": 2.9609475032010242e-05, + "loss": 0.5837, + "step": 5325 + }, + { + "epoch": 4.26, + "learning_rate": 2.9577464788732395e-05, + "loss": 0.6491, + "step": 5326 + }, + { + "epoch": 4.26, + "learning_rate": 2.954545454545455e-05, + "loss": 0.621, + "step": 5327 + }, + { + "epoch": 4.26, + "learning_rate": 2.9513444302176695e-05, + "loss": 0.7181, + "step": 5328 + }, + { + "epoch": 4.26, + "learning_rate": 2.9481434058898848e-05, + "loss": 0.6588, + "step": 5329 + }, + { + "epoch": 4.26, + "learning_rate": 2.9449423815621e-05, + "loss": 0.7559, + "step": 5330 + }, + { + "epoch": 4.26, + "learning_rate": 2.941741357234315e-05, + "loss": 0.7544, + "step": 5331 + }, + { + "epoch": 4.27, + "learning_rate": 2.9385403329065303e-05, + "loss": 0.5908, + "step": 5332 + }, + { + "epoch": 4.27, + "learning_rate": 2.935339308578745e-05, + "loss": 0.7422, + "step": 5333 + }, + { + "epoch": 4.27, + "learning_rate": 2.9321382842509603e-05, + "loss": 0.6433, + "step": 5334 + }, + { + "epoch": 4.27, + "learning_rate": 2.9289372599231756e-05, + "loss": 0.6804, + "step": 5335 + }, + { + "epoch": 4.27, + "learning_rate": 2.9257362355953905e-05, + "loss": 0.7066, + "step": 5336 + }, + { + "epoch": 4.27, + "learning_rate": 2.9225352112676058e-05, + "loss": 0.6117, + "step": 5337 + }, + { + "epoch": 4.27, + "learning_rate": 2.919334186939821e-05, + "loss": 0.7083, + "step": 5338 + }, + { + "epoch": 4.27, + "learning_rate": 2.9161331626120357e-05, + "loss": 0.6518, + "step": 5339 + }, + { + "epoch": 4.27, + "learning_rate": 2.912932138284251e-05, + "loss": 0.5896, + "step": 5340 + }, + { + "epoch": 4.27, + "learning_rate": 2.9097311139564663e-05, + "loss": 0.613, + "step": 5341 + }, + { + "epoch": 4.27, + "learning_rate": 2.9065300896286813e-05, + "loss": 0.7438, + "step": 5342 + }, + { + "epoch": 4.27, + "learning_rate": 2.9033290653008966e-05, + "loss": 0.6183, + "step": 5343 + }, + { + "epoch": 4.28, + "learning_rate": 2.9001280409731112e-05, + "loss": 0.6559, + "step": 5344 + }, + { + "epoch": 4.28, + "learning_rate": 2.8969270166453265e-05, + "loss": 0.5662, + "step": 5345 + }, + { + "epoch": 4.28, + "learning_rate": 2.8937259923175418e-05, + "loss": 0.6956, + "step": 5346 + }, + { + "epoch": 4.28, + "learning_rate": 2.8905249679897568e-05, + "loss": 0.6096, + "step": 5347 + }, + { + "epoch": 4.28, + "learning_rate": 2.887323943661972e-05, + "loss": 0.6928, + "step": 5348 + }, + { + "epoch": 4.28, + "learning_rate": 2.8841229193341874e-05, + "loss": 0.526, + "step": 5349 + }, + { + "epoch": 4.28, + "learning_rate": 2.880921895006402e-05, + "loss": 0.6964, + "step": 5350 + }, + { + "epoch": 4.28, + "learning_rate": 2.8777208706786173e-05, + "loss": 0.7463, + "step": 5351 + }, + { + "epoch": 4.28, + "learning_rate": 2.8745198463508323e-05, + "loss": 0.7229, + "step": 5352 + }, + { + "epoch": 4.28, + "learning_rate": 2.8713188220230476e-05, + "loss": 0.7143, + "step": 5353 + }, + { + "epoch": 4.28, + "learning_rate": 2.868117797695263e-05, + "loss": 0.6582, + "step": 5354 + }, + { + "epoch": 4.28, + "learning_rate": 2.8649167733674775e-05, + "loss": 0.6198, + "step": 5355 + }, + { + "epoch": 4.28, + "learning_rate": 2.8617157490396928e-05, + "loss": 0.5727, + "step": 5356 + }, + { + "epoch": 4.29, + "learning_rate": 2.858514724711908e-05, + "loss": 0.6859, + "step": 5357 + }, + { + "epoch": 4.29, + "learning_rate": 2.855313700384123e-05, + "loss": 0.6665, + "step": 5358 + }, + { + "epoch": 4.29, + "learning_rate": 2.8521126760563384e-05, + "loss": 0.5731, + "step": 5359 + }, + { + "epoch": 4.29, + "learning_rate": 2.848911651728553e-05, + "loss": 0.7578, + "step": 5360 + }, + { + "epoch": 4.29, + "learning_rate": 2.8457106274007683e-05, + "loss": 0.6387, + "step": 5361 + }, + { + "epoch": 4.29, + "learning_rate": 2.8425096030729836e-05, + "loss": 0.6575, + "step": 5362 + }, + { + "epoch": 4.29, + "learning_rate": 2.8393085787451986e-05, + "loss": 0.621, + "step": 5363 + }, + { + "epoch": 4.29, + "learning_rate": 2.836107554417414e-05, + "loss": 0.7882, + "step": 5364 + }, + { + "epoch": 4.29, + "learning_rate": 2.832906530089629e-05, + "loss": 0.7239, + "step": 5365 + }, + { + "epoch": 4.29, + "learning_rate": 2.8297055057618438e-05, + "loss": 0.5955, + "step": 5366 + }, + { + "epoch": 4.29, + "learning_rate": 2.826504481434059e-05, + "loss": 0.8315, + "step": 5367 + }, + { + "epoch": 4.29, + "learning_rate": 2.8233034571062744e-05, + "loss": 0.7171, + "step": 5368 + }, + { + "epoch": 4.3, + "learning_rate": 2.8201024327784893e-05, + "loss": 0.603, + "step": 5369 + }, + { + "epoch": 4.3, + "learning_rate": 2.8169014084507046e-05, + "loss": 0.6795, + "step": 5370 + }, + { + "epoch": 4.3, + "learning_rate": 2.8137003841229193e-05, + "loss": 0.5604, + "step": 5371 + }, + { + "epoch": 4.3, + "learning_rate": 2.8104993597951346e-05, + "loss": 0.6585, + "step": 5372 + }, + { + "epoch": 4.3, + "learning_rate": 2.80729833546735e-05, + "loss": 0.6368, + "step": 5373 + }, + { + "epoch": 4.3, + "learning_rate": 2.8040973111395645e-05, + "loss": 0.6519, + "step": 5374 + }, + { + "epoch": 4.3, + "learning_rate": 2.8008962868117798e-05, + "loss": 0.7036, + "step": 5375 + }, + { + "epoch": 4.3, + "learning_rate": 2.7976952624839954e-05, + "loss": 0.7992, + "step": 5376 + }, + { + "epoch": 4.3, + "learning_rate": 2.79449423815621e-05, + "loss": 0.5695, + "step": 5377 + }, + { + "epoch": 4.3, + "learning_rate": 2.7912932138284254e-05, + "loss": 0.7711, + "step": 5378 + }, + { + "epoch": 4.3, + "learning_rate": 2.78809218950064e-05, + "loss": 0.6557, + "step": 5379 + }, + { + "epoch": 4.3, + "learning_rate": 2.7848911651728553e-05, + "loss": 0.7017, + "step": 5380 + }, + { + "epoch": 4.3, + "learning_rate": 2.7816901408450706e-05, + "loss": 0.6468, + "step": 5381 + }, + { + "epoch": 4.31, + "learning_rate": 2.7784891165172855e-05, + "loss": 0.6789, + "step": 5382 + }, + { + "epoch": 4.31, + "learning_rate": 2.775288092189501e-05, + "loss": 0.5784, + "step": 5383 + }, + { + "epoch": 4.31, + "learning_rate": 2.772087067861716e-05, + "loss": 0.6075, + "step": 5384 + }, + { + "epoch": 4.31, + "learning_rate": 2.7688860435339308e-05, + "loss": 0.6885, + "step": 5385 + }, + { + "epoch": 4.31, + "learning_rate": 2.765685019206146e-05, + "loss": 0.6508, + "step": 5386 + }, + { + "epoch": 4.31, + "learning_rate": 2.762483994878361e-05, + "loss": 0.6331, + "step": 5387 + }, + { + "epoch": 4.31, + "learning_rate": 2.7592829705505763e-05, + "loss": 0.5895, + "step": 5388 + }, + { + "epoch": 4.31, + "learning_rate": 2.7560819462227916e-05, + "loss": 0.7672, + "step": 5389 + }, + { + "epoch": 4.31, + "learning_rate": 2.7528809218950063e-05, + "loss": 0.6117, + "step": 5390 + }, + { + "epoch": 4.31, + "learning_rate": 2.7496798975672216e-05, + "loss": 0.7022, + "step": 5391 + }, + { + "epoch": 4.31, + "learning_rate": 2.746478873239437e-05, + "loss": 0.7279, + "step": 5392 + }, + { + "epoch": 4.31, + "learning_rate": 2.7432778489116518e-05, + "loss": 0.7343, + "step": 5393 + }, + { + "epoch": 4.32, + "learning_rate": 2.740076824583867e-05, + "loss": 0.6588, + "step": 5394 + }, + { + "epoch": 4.32, + "learning_rate": 2.7368758002560817e-05, + "loss": 0.5883, + "step": 5395 + }, + { + "epoch": 4.32, + "learning_rate": 2.733674775928297e-05, + "loss": 0.6211, + "step": 5396 + }, + { + "epoch": 4.32, + "learning_rate": 2.7304737516005123e-05, + "loss": 0.6284, + "step": 5397 + }, + { + "epoch": 4.32, + "learning_rate": 2.7272727272727273e-05, + "loss": 0.6185, + "step": 5398 + }, + { + "epoch": 4.32, + "learning_rate": 2.7240717029449426e-05, + "loss": 0.712, + "step": 5399 + }, + { + "epoch": 4.32, + "learning_rate": 2.720870678617158e-05, + "loss": 0.7412, + "step": 5400 + }, + { + "epoch": 4.32, + "learning_rate": 2.7176696542893725e-05, + "loss": 0.612, + "step": 5401 + }, + { + "epoch": 4.32, + "learning_rate": 2.714468629961588e-05, + "loss": 0.5962, + "step": 5402 + }, + { + "epoch": 4.32, + "learning_rate": 2.711267605633803e-05, + "loss": 0.562, + "step": 5403 + }, + { + "epoch": 4.32, + "learning_rate": 2.708066581306018e-05, + "loss": 0.7743, + "step": 5404 + }, + { + "epoch": 4.32, + "learning_rate": 2.7048655569782334e-05, + "loss": 0.5637, + "step": 5405 + }, + { + "epoch": 4.32, + "learning_rate": 2.701664532650448e-05, + "loss": 0.6334, + "step": 5406 + }, + { + "epoch": 4.33, + "learning_rate": 2.6984635083226633e-05, + "loss": 0.7589, + "step": 5407 + }, + { + "epoch": 4.33, + "learning_rate": 2.6952624839948786e-05, + "loss": 0.6662, + "step": 5408 + }, + { + "epoch": 4.33, + "learning_rate": 2.6920614596670936e-05, + "loss": 0.5661, + "step": 5409 + }, + { + "epoch": 4.33, + "learning_rate": 2.688860435339309e-05, + "loss": 0.5992, + "step": 5410 + }, + { + "epoch": 4.33, + "learning_rate": 2.6856594110115242e-05, + "loss": 0.5246, + "step": 5411 + }, + { + "epoch": 4.33, + "learning_rate": 2.6824583866837388e-05, + "loss": 0.702, + "step": 5412 + }, + { + "epoch": 4.33, + "learning_rate": 2.679257362355954e-05, + "loss": 0.7051, + "step": 5413 + }, + { + "epoch": 4.33, + "learning_rate": 2.676056338028169e-05, + "loss": 0.6433, + "step": 5414 + }, + { + "epoch": 4.33, + "learning_rate": 2.6728553137003844e-05, + "loss": 0.7156, + "step": 5415 + }, + { + "epoch": 4.33, + "learning_rate": 2.6696542893725997e-05, + "loss": 0.6018, + "step": 5416 + }, + { + "epoch": 4.33, + "learning_rate": 2.6664532650448143e-05, + "loss": 0.6194, + "step": 5417 + }, + { + "epoch": 4.33, + "learning_rate": 2.6632522407170296e-05, + "loss": 0.7008, + "step": 5418 + }, + { + "epoch": 4.34, + "learning_rate": 2.660051216389245e-05, + "loss": 0.6147, + "step": 5419 + }, + { + "epoch": 4.34, + "learning_rate": 2.6568501920614595e-05, + "loss": 0.6932, + "step": 5420 + }, + { + "epoch": 4.34, + "learning_rate": 2.653649167733675e-05, + "loss": 0.6572, + "step": 5421 + }, + { + "epoch": 4.34, + "learning_rate": 2.6504481434058898e-05, + "loss": 0.6893, + "step": 5422 + }, + { + "epoch": 4.34, + "learning_rate": 2.647247119078105e-05, + "loss": 0.5799, + "step": 5423 + }, + { + "epoch": 4.34, + "learning_rate": 2.6440460947503204e-05, + "loss": 0.7251, + "step": 5424 + }, + { + "epoch": 4.34, + "learning_rate": 2.640845070422535e-05, + "loss": 0.6485, + "step": 5425 + }, + { + "epoch": 4.34, + "learning_rate": 2.6376440460947503e-05, + "loss": 0.7895, + "step": 5426 + }, + { + "epoch": 4.34, + "learning_rate": 2.6344430217669656e-05, + "loss": 0.7611, + "step": 5427 + }, + { + "epoch": 4.34, + "learning_rate": 2.6312419974391806e-05, + "loss": 0.5717, + "step": 5428 + }, + { + "epoch": 4.34, + "learning_rate": 2.628040973111396e-05, + "loss": 0.6753, + "step": 5429 + }, + { + "epoch": 4.34, + "learning_rate": 2.6248399487836105e-05, + "loss": 0.651, + "step": 5430 + }, + { + "epoch": 4.34, + "learning_rate": 2.6216389244558258e-05, + "loss": 0.758, + "step": 5431 + }, + { + "epoch": 4.35, + "learning_rate": 2.618437900128041e-05, + "loss": 0.6365, + "step": 5432 + }, + { + "epoch": 4.35, + "learning_rate": 2.615236875800256e-05, + "loss": 0.6632, + "step": 5433 + }, + { + "epoch": 4.35, + "learning_rate": 2.6120358514724714e-05, + "loss": 0.7146, + "step": 5434 + }, + { + "epoch": 4.35, + "learning_rate": 2.6088348271446867e-05, + "loss": 0.7343, + "step": 5435 + }, + { + "epoch": 4.35, + "learning_rate": 2.6056338028169013e-05, + "loss": 0.6725, + "step": 5436 + }, + { + "epoch": 4.35, + "learning_rate": 2.6024327784891166e-05, + "loss": 0.6568, + "step": 5437 + }, + { + "epoch": 4.35, + "learning_rate": 2.599231754161332e-05, + "loss": 0.6921, + "step": 5438 + }, + { + "epoch": 4.35, + "learning_rate": 2.596030729833547e-05, + "loss": 0.6488, + "step": 5439 + }, + { + "epoch": 4.35, + "learning_rate": 2.592829705505762e-05, + "loss": 0.6519, + "step": 5440 + }, + { + "epoch": 4.35, + "learning_rate": 2.5896286811779768e-05, + "loss": 0.5882, + "step": 5441 + }, + { + "epoch": 4.35, + "learning_rate": 2.586427656850192e-05, + "loss": 0.6288, + "step": 5442 + }, + { + "epoch": 4.35, + "learning_rate": 2.5832266325224074e-05, + "loss": 0.6036, + "step": 5443 + }, + { + "epoch": 4.36, + "learning_rate": 2.5800256081946223e-05, + "loss": 0.7171, + "step": 5444 + }, + { + "epoch": 4.36, + "learning_rate": 2.5768245838668376e-05, + "loss": 0.6476, + "step": 5445 + }, + { + "epoch": 4.36, + "learning_rate": 2.573623559539053e-05, + "loss": 0.6696, + "step": 5446 + }, + { + "epoch": 4.36, + "learning_rate": 2.5704225352112676e-05, + "loss": 0.6514, + "step": 5447 + }, + { + "epoch": 4.36, + "learning_rate": 2.567221510883483e-05, + "loss": 0.6642, + "step": 5448 + }, + { + "epoch": 4.36, + "learning_rate": 2.5640204865556978e-05, + "loss": 0.7138, + "step": 5449 + }, + { + "epoch": 4.36, + "learning_rate": 2.560819462227913e-05, + "loss": 0.7086, + "step": 5450 + }, + { + "epoch": 4.36, + "learning_rate": 2.5576184379001284e-05, + "loss": 0.6912, + "step": 5451 + }, + { + "epoch": 4.36, + "learning_rate": 2.554417413572343e-05, + "loss": 0.612, + "step": 5452 + }, + { + "epoch": 4.36, + "learning_rate": 2.5512163892445584e-05, + "loss": 0.6721, + "step": 5453 + }, + { + "epoch": 4.36, + "learning_rate": 2.5480153649167737e-05, + "loss": 0.6158, + "step": 5454 + }, + { + "epoch": 4.36, + "learning_rate": 2.5448143405889886e-05, + "loss": 0.6948, + "step": 5455 + }, + { + "epoch": 4.36, + "learning_rate": 2.541613316261204e-05, + "loss": 0.5848, + "step": 5456 + }, + { + "epoch": 4.37, + "learning_rate": 2.5384122919334185e-05, + "loss": 0.6856, + "step": 5457 + }, + { + "epoch": 4.37, + "learning_rate": 2.535211267605634e-05, + "loss": 0.6318, + "step": 5458 + }, + { + "epoch": 4.37, + "learning_rate": 2.532010243277849e-05, + "loss": 0.649, + "step": 5459 + }, + { + "epoch": 4.37, + "learning_rate": 2.528809218950064e-05, + "loss": 0.7003, + "step": 5460 + }, + { + "epoch": 4.37, + "learning_rate": 2.5256081946222794e-05, + "loss": 0.685, + "step": 5461 + }, + { + "epoch": 4.37, + "learning_rate": 2.5224071702944947e-05, + "loss": 0.6378, + "step": 5462 + }, + { + "epoch": 4.37, + "learning_rate": 2.5192061459667093e-05, + "loss": 0.7063, + "step": 5463 + }, + { + "epoch": 4.37, + "learning_rate": 2.5160051216389246e-05, + "loss": 0.6583, + "step": 5464 + }, + { + "epoch": 4.37, + "learning_rate": 2.5128040973111393e-05, + "loss": 0.6343, + "step": 5465 + }, + { + "epoch": 4.37, + "learning_rate": 2.509603072983355e-05, + "loss": 0.6641, + "step": 5466 + }, + { + "epoch": 4.37, + "learning_rate": 2.5064020486555702e-05, + "loss": 0.7507, + "step": 5467 + }, + { + "epoch": 4.37, + "learning_rate": 2.5032010243277848e-05, + "loss": 0.5453, + "step": 5468 + }, + { + "epoch": 4.38, + "learning_rate": 2.5e-05, + "loss": 0.6481, + "step": 5469 + }, + { + "epoch": 4.38, + "learning_rate": 2.496798975672215e-05, + "loss": 0.6354, + "step": 5470 + }, + { + "epoch": 4.38, + "learning_rate": 2.4935979513444304e-05, + "loss": 0.6671, + "step": 5471 + }, + { + "epoch": 4.38, + "learning_rate": 2.4903969270166453e-05, + "loss": 0.7768, + "step": 5472 + }, + { + "epoch": 4.38, + "learning_rate": 2.4871959026888606e-05, + "loss": 0.542, + "step": 5473 + }, + { + "epoch": 4.38, + "learning_rate": 2.4839948783610756e-05, + "loss": 0.6329, + "step": 5474 + }, + { + "epoch": 4.38, + "learning_rate": 2.480793854033291e-05, + "loss": 0.6544, + "step": 5475 + }, + { + "epoch": 4.38, + "learning_rate": 2.477592829705506e-05, + "loss": 0.6491, + "step": 5476 + }, + { + "epoch": 4.38, + "learning_rate": 2.474391805377721e-05, + "loss": 0.8764, + "step": 5477 + }, + { + "epoch": 4.38, + "learning_rate": 2.471190781049936e-05, + "loss": 0.7096, + "step": 5478 + }, + { + "epoch": 4.38, + "learning_rate": 2.4679897567221514e-05, + "loss": 0.588, + "step": 5479 + }, + { + "epoch": 4.38, + "learning_rate": 2.4647887323943664e-05, + "loss": 0.6934, + "step": 5480 + }, + { + "epoch": 4.38, + "learning_rate": 2.4615877080665814e-05, + "loss": 0.6525, + "step": 5481 + }, + { + "epoch": 4.39, + "learning_rate": 2.4583866837387963e-05, + "loss": 0.6507, + "step": 5482 + }, + { + "epoch": 4.39, + "learning_rate": 2.4551856594110116e-05, + "loss": 0.6907, + "step": 5483 + }, + { + "epoch": 4.39, + "learning_rate": 2.451984635083227e-05, + "loss": 0.7025, + "step": 5484 + }, + { + "epoch": 4.39, + "learning_rate": 2.448783610755442e-05, + "loss": 0.5338, + "step": 5485 + }, + { + "epoch": 4.39, + "learning_rate": 2.445582586427657e-05, + "loss": 0.7106, + "step": 5486 + }, + { + "epoch": 4.39, + "learning_rate": 2.442381562099872e-05, + "loss": 0.6269, + "step": 5487 + }, + { + "epoch": 4.39, + "learning_rate": 2.439180537772087e-05, + "loss": 0.6741, + "step": 5488 + }, + { + "epoch": 4.39, + "learning_rate": 2.4359795134443024e-05, + "loss": 0.7562, + "step": 5489 + }, + { + "epoch": 4.39, + "learning_rate": 2.4327784891165174e-05, + "loss": 0.7614, + "step": 5490 + }, + { + "epoch": 4.39, + "learning_rate": 2.4295774647887327e-05, + "loss": 0.6973, + "step": 5491 + }, + { + "epoch": 4.39, + "learning_rate": 2.4263764404609476e-05, + "loss": 0.6533, + "step": 5492 + }, + { + "epoch": 4.39, + "learning_rate": 2.4231754161331626e-05, + "loss": 0.7386, + "step": 5493 + }, + { + "epoch": 4.4, + "learning_rate": 2.4199743918053776e-05, + "loss": 0.6729, + "step": 5494 + }, + { + "epoch": 4.4, + "learning_rate": 2.4167733674775932e-05, + "loss": 0.5701, + "step": 5495 + }, + { + "epoch": 4.4, + "learning_rate": 2.413572343149808e-05, + "loss": 0.6856, + "step": 5496 + }, + { + "epoch": 4.4, + "learning_rate": 2.410371318822023e-05, + "loss": 0.6357, + "step": 5497 + }, + { + "epoch": 4.4, + "learning_rate": 2.407170294494238e-05, + "loss": 0.7307, + "step": 5498 + }, + { + "epoch": 4.4, + "learning_rate": 2.4039692701664534e-05, + "loss": 0.6799, + "step": 5499 + }, + { + "epoch": 4.4, + "learning_rate": 2.4007682458386683e-05, + "loss": 0.5935, + "step": 5500 + } + ], + "max_steps": 6250, + "num_train_epochs": 5, + "total_flos": 2.156691989232599e+17, + "trial_name": null, + "trial_params": null +}