diff --git "a/checkpoint-9472/trainer_state.json" "b/checkpoint-9472/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-9472/trainer_state.json" @@ -0,0 +1,56923 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9700499168053245, + "eval_steps": 1000, + "global_step": 9472, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.859375e-09, + "loss": 1.296, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2.34375e-08, + "loss": 1.3068, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 5.2734375e-08, + "loss": 1.1979, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 9.375e-08, + "loss": 1.3552, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.4648437500000001e-07, + "loss": 1.2051, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.109375e-07, + "loss": 1.7044, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.87109375e-07, + "loss": 1.4791, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.75e-07, + "loss": 1.5419, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.74609375e-07, + "loss": 1.332, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5.859375000000001e-07, + "loss": 1.3585, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 7.08984375e-07, + "loss": 1.3662, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 8.4375e-07, + "loss": 1.1231, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 9.90234375e-07, + "loss": 1.5108, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 1.1484375e-06, + "loss": 1.3973, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 1.318359375e-06, + "loss": 1.2765, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 1.2937, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 1.693359375e-06, + "loss": 1.4439, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 1.8984375e-06, + "loss": 1.4299, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 2.115234375e-06, + "loss": 1.474, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 2.3437500000000002e-06, + "loss": 1.3522, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.583984375e-06, + "loss": 1.5762, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 2.8359375e-06, + "loss": 1.5249, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 3.099609375e-06, + "loss": 1.2834, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 3.375e-06, + "loss": 1.2947, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 3.6621093750000003e-06, + "loss": 1.2467, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 3.9609375e-06, + "loss": 1.3112, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 4.271484375e-06, + "loss": 1.1706, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 4.59375e-06, + "loss": 1.2307, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 4.927734375e-06, + "loss": 1.2279, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 5.2734375e-06, + "loss": 1.4434, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5.630859375e-06, + "loss": 1.3737, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 1.4432, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 5.9999998388251064e-06, + "loss": 1.2401, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 5.999999355300446e-06, + "loss": 1.5202, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 5.999998549426068e-06, + "loss": 1.3696, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 5.999997421202059e-06, + "loss": 1.2896, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 5.999995970628542e-06, + "loss": 1.0738, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 5.999994197705672e-06, + "loss": 1.2536, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 5.999992102433639e-06, + "loss": 1.0722, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 5.999989684812669e-06, + "loss": 1.268, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 5.999986944843022e-06, + "loss": 1.0615, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 5.999983882524992e-06, + "loss": 1.0649, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 5.999980497858907e-06, + "loss": 1.3919, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 5.999976790845131e-06, + "loss": 1.4423, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 5.999972761484064e-06, + "loss": 1.1149, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 5.999968409776138e-06, + "loss": 1.2653, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 5.99996373572182e-06, + "loss": 1.1168, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 5.999958739321613e-06, + "loss": 1.2264, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 5.999953420576053e-06, + "loss": 1.2457, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 5.999947779485713e-06, + "loss": 0.9746, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 5.999941816051197e-06, + "loss": 1.1715, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 5.999935530273147e-06, + "loss": 1.2051, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 5.999928922152239e-06, + "loss": 1.2213, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 5.999921991689182e-06, + "loss": 1.2407, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 5.9999147388847215e-06, + "loss": 1.2428, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 5.999907163739636e-06, + "loss": 0.8133, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 5.9998992662547405e-06, + "loss": 0.9955, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 5.999891046430881e-06, + "loss": 1.2974, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 5.999882504268945e-06, + "loss": 0.8606, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 5.999873639769846e-06, + "loss": 0.9606, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 5.99986445293454e-06, + "loss": 0.9696, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 5.999854943764012e-06, + "loss": 1.5109, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 5.999845112259284e-06, + "loss": 1.1779, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 5.999834958421414e-06, + "loss": 0.9999, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 5.99982448225149e-06, + "loss": 1.1015, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 5.999813683750641e-06, + "loss": 0.911, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 5.999802562920025e-06, + "loss": 1.2481, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 5.999791119760837e-06, + "loss": 1.0154, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 5.999779354274308e-06, + "loss": 1.2935, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 5.999767266461702e-06, + "loss": 0.9441, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 5.9997548563243165e-06, + "loss": 1.1343, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 5.9997421238634865e-06, + "loss": 1.1293, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 5.999729069080578e-06, + "loss": 1.2322, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 5.999715691976996e-06, + "loss": 1.1863, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 5.999701992554177e-06, + "loss": 1.3767, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 5.999687970813593e-06, + "loss": 1.085, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 5.99967362675675e-06, + "loss": 1.3077, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 5.9996589603851905e-06, + "loss": 1.1452, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 5.999643971700489e-06, + "loss": 1.0257, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 5.999628660704258e-06, + "loss": 1.1751, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 5.99961302739814e-06, + "loss": 0.9148, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 5.999597071783817e-06, + "loss": 0.9614, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 5.999580793863003e-06, + "loss": 1.1894, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 5.999564193637447e-06, + "loss": 1.241, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 5.999547271108932e-06, + "loss": 1.0488, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 5.999530026279275e-06, + "loss": 0.9997, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 5.999512459150333e-06, + "loss": 1.1317, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 5.99949456972399e-06, + "loss": 1.1307, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 5.999476358002171e-06, + "loss": 1.0631, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 5.9994578239868306e-06, + "loss": 1.0541, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 5.999438967679961e-06, + "loss": 1.1123, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 5.999419789083588e-06, + "loss": 1.1307, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 5.9994002881997734e-06, + "loss": 1.2111, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 5.999380465030611e-06, + "loss": 1.2605, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 5.999360319578232e-06, + "loss": 0.9863, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 5.999339851844801e-06, + "loss": 1.0087, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 5.999319061832517e-06, + "loss": 1.2027, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 5.9992979495436134e-06, + "loss": 1.1981, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 5.99927651498036e-06, + "loss": 0.8458, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 5.9992547581450585e-06, + "loss": 0.9559, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 5.999232679040047e-06, + "loss": 1.3397, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 5.999210277667698e-06, + "loss": 1.2488, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 5.99918755403042e-06, + "loss": 0.9681, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 5.999164508130651e-06, + "loss": 1.0477, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 5.999141139970872e-06, + "loss": 1.0544, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 5.999117449553591e-06, + "loss": 0.94, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 5.999093436881353e-06, + "loss": 0.9462, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 5.999069101956739e-06, + "loss": 1.0561, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 5.999044444782366e-06, + "loss": 1.1276, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 5.99901946536088e-06, + "loss": 0.9206, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 5.998994163694967e-06, + "loss": 0.939, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 5.998968539787344e-06, + "loss": 1.1145, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 5.998942593640767e-06, + "loss": 1.0664, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 5.998916325258021e-06, + "loss": 0.9519, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 5.99888973464193e-06, + "loss": 1.0119, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 5.998862821795352e-06, + "loss": 0.981, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 5.998835586721177e-06, + "loss": 1.1971, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 5.998808029422333e-06, + "loss": 1.0213, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 5.998780149901779e-06, + "loss": 1.1281, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 5.998751948162512e-06, + "loss": 0.8975, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 5.998723424207563e-06, + "loss": 1.0096, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 5.998694578039996e-06, + "loss": 1.2597, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 5.99866540966291e-06, + "loss": 1.1371, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 5.998635919079441e-06, + "loss": 1.1777, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 5.9986061062927545e-06, + "loss": 0.9861, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 5.9985759713060565e-06, + "loss": 1.1509, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 5.998545514122585e-06, + "loss": 1.0427, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 5.9985147347456115e-06, + "loss": 1.0939, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 5.998483633178444e-06, + "loss": 1.0729, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 5.9984522094244245e-06, + "loss": 1.0709, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 5.998420463486928e-06, + "loss": 1.103, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 5.998388395369367e-06, + "loss": 1.0118, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 5.998356005075187e-06, + "loss": 1.1575, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 5.998323292607869e-06, + "loss": 1.0233, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 5.9982902579709254e-06, + "loss": 0.8519, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 5.998256901167909e-06, + "loss": 0.9166, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 5.998223222202402e-06, + "loss": 1.1034, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 5.998189221078024e-06, + "loss": 0.9666, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 5.998154897798428e-06, + "loss": 1.165, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 5.998120252367302e-06, + "loss": 1.2268, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 5.998085284788369e-06, + "loss": 0.9355, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 5.998049995065386e-06, + "loss": 1.1064, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 5.998014383202145e-06, + "loss": 1.1342, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 5.997978449202472e-06, + "loss": 0.9912, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 5.997942193070229e-06, + "loss": 0.998, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 5.997905614809312e-06, + "loss": 1.0592, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 5.99786871442365e-06, + "loss": 0.8856, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 5.9978314919172075e-06, + "loss": 1.0244, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 5.997793947293987e-06, + "loss": 1.1642, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 5.99775608055802e-06, + "loss": 0.9795, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 5.997717891713375e-06, + "loss": 0.9266, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 5.997679380764158e-06, + "loss": 1.1468, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 5.997640547714504e-06, + "loss": 0.9199, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 5.997601392568589e-06, + "loss": 1.0031, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 5.997561915330617e-06, + "loss": 1.0035, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 5.9975221160048306e-06, + "loss": 0.9766, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 5.997481994595507e-06, + "loss": 1.1334, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 5.997441551106958e-06, + "loss": 1.2685, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 5.997400785543528e-06, + "loss": 1.0696, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 5.997359697909597e-06, + "loss": 0.9403, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 5.99731828820958e-06, + "loss": 0.9769, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 5.9972765564479265e-06, + "loss": 0.8709, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 5.997234502629122e-06, + "loss": 1.0661, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 5.997192126757684e-06, + "loss": 1.0048, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 5.997149428838165e-06, + "loss": 1.1654, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 5.9971064088751545e-06, + "loss": 1.0636, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 5.997063066873274e-06, + "loss": 1.0468, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 5.997019402837181e-06, + "loss": 1.071, + "step": 168 + }, + { + "epoch": 0.04, + "learning_rate": 5.996975416771567e-06, + "loss": 0.9035, + "step": 169 + }, + { + "epoch": 0.04, + "learning_rate": 5.996931108681158e-06, + "loss": 0.9098, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 5.996886478570716e-06, + "loss": 0.9487, + "step": 171 + }, + { + "epoch": 0.04, + "learning_rate": 5.996841526445035e-06, + "loss": 0.8831, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 5.996796252308945e-06, + "loss": 0.9892, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 5.996750656167313e-06, + "loss": 1.0303, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 5.996704738025036e-06, + "loss": 1.1696, + "step": 175 + }, + { + "epoch": 0.04, + "learning_rate": 5.9966584978870496e-06, + "loss": 1.145, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 5.996611935758321e-06, + "loss": 1.0817, + "step": 177 + }, + { + "epoch": 0.04, + "learning_rate": 5.996565051643854e-06, + "loss": 0.9447, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 5.9965178455486854e-06, + "loss": 0.9365, + "step": 179 + }, + { + "epoch": 0.04, + "learning_rate": 5.996470317477888e-06, + "loss": 0.9432, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 5.996422467436569e-06, + "loss": 1.0671, + "step": 181 + }, + { + "epoch": 0.04, + "learning_rate": 5.99637429542987e-06, + "loss": 1.1596, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 5.9963258014629674e-06, + "loss": 1.0035, + "step": 183 + }, + { + "epoch": 0.04, + "learning_rate": 5.996276985541071e-06, + "loss": 0.9413, + "step": 184 + }, + { + "epoch": 0.04, + "learning_rate": 5.9962278476694256e-06, + "loss": 1.0505, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 5.996178387853312e-06, + "loss": 1.0839, + "step": 186 + }, + { + "epoch": 0.04, + "learning_rate": 5.996128606098044e-06, + "loss": 0.9701, + "step": 187 + }, + { + "epoch": 0.04, + "learning_rate": 5.996078502408972e-06, + "loss": 1.0174, + "step": 188 + }, + { + "epoch": 0.04, + "learning_rate": 5.996028076791477e-06, + "loss": 0.9293, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 5.995977329250981e-06, + "loss": 1.0603, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 5.995926259792934e-06, + "loss": 1.2609, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 5.995874868422825e-06, + "loss": 1.027, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 5.995823155146174e-06, + "loss": 1.2603, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 5.995771119968538e-06, + "loss": 0.9458, + "step": 194 + }, + { + "epoch": 0.04, + "learning_rate": 5.9957187628955105e-06, + "loss": 0.9053, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 5.995666083932715e-06, + "loss": 1.0194, + "step": 196 + }, + { + "epoch": 0.04, + "learning_rate": 5.995613083085813e-06, + "loss": 1.1203, + "step": 197 + }, + { + "epoch": 0.04, + "learning_rate": 5.9955597603604985e-06, + "loss": 1.1192, + "step": 198 + }, + { + "epoch": 0.04, + "learning_rate": 5.9955061157625015e-06, + "loss": 1.2204, + "step": 199 + }, + { + "epoch": 0.04, + "learning_rate": 5.995452149297587e-06, + "loss": 0.9118, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 5.9953978609715515e-06, + "loss": 1.412, + "step": 201 + }, + { + "epoch": 0.04, + "learning_rate": 5.995343250790231e-06, + "loss": 0.9113, + "step": 202 + }, + { + "epoch": 0.04, + "learning_rate": 5.995288318759491e-06, + "loss": 0.9874, + "step": 203 + }, + { + "epoch": 0.04, + "learning_rate": 5.9952330648852344e-06, + "loss": 0.9065, + "step": 204 + }, + { + "epoch": 0.04, + "learning_rate": 5.9951774891734e-06, + "loss": 1.2576, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 5.9951215916299575e-06, + "loss": 1.0187, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 5.995065372260913e-06, + "loss": 0.8289, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 5.995008831072308e-06, + "loss": 0.9299, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 5.9949519680702195e-06, + "loss": 0.8268, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 5.994894783260755e-06, + "loss": 0.8399, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 5.9948372766500595e-06, + "loss": 0.977, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 5.994779448244313e-06, + "loss": 1.0439, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 5.994721298049727e-06, + "loss": 1.0908, + "step": 213 + }, + { + "epoch": 0.04, + "learning_rate": 5.994662826072552e-06, + "loss": 1.0506, + "step": 214 + }, + { + "epoch": 0.04, + "learning_rate": 5.99460403231907e-06, + "loss": 0.7117, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 5.994544916795599e-06, + "loss": 0.9917, + "step": 216 + }, + { + "epoch": 0.05, + "learning_rate": 5.9944854795084894e-06, + "loss": 1.1558, + "step": 217 + }, + { + "epoch": 0.05, + "learning_rate": 5.994425720464129e-06, + "loss": 1.0204, + "step": 218 + }, + { + "epoch": 0.05, + "learning_rate": 5.994365639668939e-06, + "loss": 1.0093, + "step": 219 + }, + { + "epoch": 0.05, + "learning_rate": 5.994305237129374e-06, + "loss": 1.1288, + "step": 220 + }, + { + "epoch": 0.05, + "learning_rate": 5.994244512851926e-06, + "loss": 0.8111, + "step": 221 + }, + { + "epoch": 0.05, + "learning_rate": 5.994183466843119e-06, + "loss": 0.7688, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 5.99412209910951e-06, + "loss": 1.0126, + "step": 223 + }, + { + "epoch": 0.05, + "learning_rate": 5.994060409657697e-06, + "loss": 0.9432, + "step": 224 + }, + { + "epoch": 0.05, + "learning_rate": 5.9939983984943064e-06, + "loss": 0.8748, + "step": 225 + }, + { + "epoch": 0.05, + "learning_rate": 5.993936065626001e-06, + "loss": 1.1009, + "step": 226 + }, + { + "epoch": 0.05, + "learning_rate": 5.993873411059479e-06, + "loss": 1.335, + "step": 227 + }, + { + "epoch": 0.05, + "learning_rate": 5.993810434801473e-06, + "loss": 0.9907, + "step": 228 + }, + { + "epoch": 0.05, + "learning_rate": 5.993747136858748e-06, + "loss": 1.145, + "step": 229 + }, + { + "epoch": 0.05, + "learning_rate": 5.993683517238109e-06, + "loss": 0.8521, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 5.993619575946389e-06, + "loss": 1.1498, + "step": 231 + }, + { + "epoch": 0.05, + "learning_rate": 5.993555312990459e-06, + "loss": 0.9589, + "step": 232 + }, + { + "epoch": 0.05, + "learning_rate": 5.993490728377223e-06, + "loss": 0.937, + "step": 233 + }, + { + "epoch": 0.05, + "learning_rate": 5.9934258221136235e-06, + "loss": 1.0393, + "step": 234 + }, + { + "epoch": 0.05, + "learning_rate": 5.993360594206633e-06, + "loss": 0.8115, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 5.993295044663259e-06, + "loss": 1.1074, + "step": 236 + }, + { + "epoch": 0.05, + "learning_rate": 5.993229173490547e-06, + "loss": 0.9184, + "step": 237 + }, + { + "epoch": 0.05, + "learning_rate": 5.993162980695574e-06, + "loss": 1.2034, + "step": 238 + }, + { + "epoch": 0.05, + "learning_rate": 5.993096466285451e-06, + "loss": 1.1001, + "step": 239 + }, + { + "epoch": 0.05, + "learning_rate": 5.993029630267327e-06, + "loss": 0.8281, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 5.992962472648384e-06, + "loss": 1.1165, + "step": 241 + }, + { + "epoch": 0.05, + "learning_rate": 5.9928949934358355e-06, + "loss": 0.9084, + "step": 242 + }, + { + "epoch": 0.05, + "learning_rate": 5.992827192636934e-06, + "loss": 0.8897, + "step": 243 + }, + { + "epoch": 0.05, + "learning_rate": 5.992759070258965e-06, + "loss": 1.0191, + "step": 244 + }, + { + "epoch": 0.05, + "learning_rate": 5.992690626309246e-06, + "loss": 1.1065, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 5.992621860795134e-06, + "loss": 0.9703, + "step": 246 + }, + { + "epoch": 0.05, + "learning_rate": 5.9925527737240155e-06, + "loss": 1.307, + "step": 247 + }, + { + "epoch": 0.05, + "learning_rate": 5.9924833651033155e-06, + "loss": 0.8289, + "step": 248 + }, + { + "epoch": 0.05, + "learning_rate": 5.992413634940492e-06, + "loss": 0.7635, + "step": 249 + }, + { + "epoch": 0.05, + "learning_rate": 5.992343583243036e-06, + "loss": 1.096, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 5.992273210018476e-06, + "loss": 0.9323, + "step": 251 + }, + { + "epoch": 0.05, + "learning_rate": 5.9922025152743725e-06, + "loss": 0.8956, + "step": 252 + }, + { + "epoch": 0.05, + "learning_rate": 5.992131499018324e-06, + "loss": 1.0958, + "step": 253 + }, + { + "epoch": 0.05, + "learning_rate": 5.9920601612579575e-06, + "loss": 1.0515, + "step": 254 + }, + { + "epoch": 0.05, + "learning_rate": 5.991988502000941e-06, + "loss": 1.1646, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 5.991916521254974e-06, + "loss": 0.8565, + "step": 256 + }, + { + "epoch": 0.05, + "learning_rate": 5.991844219027789e-06, + "loss": 1.2754, + "step": 257 + }, + { + "epoch": 0.05, + "learning_rate": 5.991771595327157e-06, + "loss": 0.9573, + "step": 258 + }, + { + "epoch": 0.05, + "learning_rate": 5.99169865016088e-06, + "loss": 0.9665, + "step": 259 + }, + { + "epoch": 0.05, + "learning_rate": 5.991625383536798e-06, + "loss": 0.8779, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 5.991551795462781e-06, + "loss": 0.9983, + "step": 261 + }, + { + "epoch": 0.05, + "learning_rate": 5.991477885946736e-06, + "loss": 0.9575, + "step": 262 + }, + { + "epoch": 0.05, + "learning_rate": 5.991403654996606e-06, + "loss": 0.9767, + "step": 263 + }, + { + "epoch": 0.05, + "learning_rate": 5.991329102620368e-06, + "loss": 0.9856, + "step": 264 + }, + { + "epoch": 0.06, + "learning_rate": 5.991254228826031e-06, + "loss": 1.097, + "step": 265 + }, + { + "epoch": 0.06, + "learning_rate": 5.99117903362164e-06, + "loss": 0.9823, + "step": 266 + }, + { + "epoch": 0.06, + "learning_rate": 5.9911035170152755e-06, + "loss": 1.1262, + "step": 267 + }, + { + "epoch": 0.06, + "learning_rate": 5.991027679015052e-06, + "loss": 1.0386, + "step": 268 + }, + { + "epoch": 0.06, + "learning_rate": 5.9909515196291175e-06, + "loss": 0.7662, + "step": 269 + }, + { + "epoch": 0.06, + "learning_rate": 5.990875038865656e-06, + "loss": 0.977, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 5.990798236732884e-06, + "loss": 1.2878, + "step": 271 + }, + { + "epoch": 0.06, + "learning_rate": 5.990721113239055e-06, + "loss": 0.8911, + "step": 272 + }, + { + "epoch": 0.06, + "learning_rate": 5.990643668392457e-06, + "loss": 1.0702, + "step": 273 + }, + { + "epoch": 0.06, + "learning_rate": 5.990565902201409e-06, + "loss": 1.0518, + "step": 274 + }, + { + "epoch": 0.06, + "learning_rate": 5.990487814674268e-06, + "loss": 0.8105, + "step": 275 + }, + { + "epoch": 0.06, + "learning_rate": 5.990409405819426e-06, + "loss": 1.0531, + "step": 276 + }, + { + "epoch": 0.06, + "learning_rate": 5.990330675645307e-06, + "loss": 1.0297, + "step": 277 + }, + { + "epoch": 0.06, + "learning_rate": 5.9902516241603695e-06, + "loss": 0.9146, + "step": 278 + }, + { + "epoch": 0.06, + "learning_rate": 5.990172251373108e-06, + "loss": 1.0208, + "step": 279 + }, + { + "epoch": 0.06, + "learning_rate": 5.990092557292052e-06, + "loss": 0.9104, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 5.990012541925763e-06, + "loss": 1.019, + "step": 281 + }, + { + "epoch": 0.06, + "learning_rate": 5.989932205282841e-06, + "loss": 1.0193, + "step": 282 + }, + { + "epoch": 0.06, + "learning_rate": 5.989851547371916e-06, + "loss": 1.1017, + "step": 283 + }, + { + "epoch": 0.06, + "learning_rate": 5.989770568201656e-06, + "loss": 0.8855, + "step": 284 + }, + { + "epoch": 0.06, + "learning_rate": 5.989689267780762e-06, + "loss": 1.3386, + "step": 285 + }, + { + "epoch": 0.06, + "learning_rate": 5.989607646117969e-06, + "loss": 0.7632, + "step": 286 + }, + { + "epoch": 0.06, + "learning_rate": 5.989525703222049e-06, + "loss": 0.9788, + "step": 287 + }, + { + "epoch": 0.06, + "learning_rate": 5.989443439101805e-06, + "loss": 0.9542, + "step": 288 + }, + { + "epoch": 0.06, + "learning_rate": 5.989360853766076e-06, + "loss": 1.0081, + "step": 289 + }, + { + "epoch": 0.06, + "learning_rate": 5.989277947223737e-06, + "loss": 1.0064, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 5.989194719483696e-06, + "loss": 1.3006, + "step": 291 + }, + { + "epoch": 0.06, + "learning_rate": 5.989111170554895e-06, + "loss": 0.9354, + "step": 292 + }, + { + "epoch": 0.06, + "learning_rate": 5.989027300446313e-06, + "loss": 1.0887, + "step": 293 + }, + { + "epoch": 0.06, + "learning_rate": 5.988943109166961e-06, + "loss": 1.2155, + "step": 294 + }, + { + "epoch": 0.06, + "learning_rate": 5.988858596725884e-06, + "loss": 1.0584, + "step": 295 + }, + { + "epoch": 0.06, + "learning_rate": 5.988773763132165e-06, + "loss": 1.1149, + "step": 296 + }, + { + "epoch": 0.06, + "learning_rate": 5.988688608394918e-06, + "loss": 1.0847, + "step": 297 + }, + { + "epoch": 0.06, + "learning_rate": 5.988603132523294e-06, + "loss": 0.9848, + "step": 298 + }, + { + "epoch": 0.06, + "learning_rate": 5.988517335526476e-06, + "loss": 0.9482, + "step": 299 + }, + { + "epoch": 0.06, + "learning_rate": 5.9884312174136835e-06, + "loss": 1.031, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 5.988344778194171e-06, + "loss": 1.134, + "step": 301 + }, + { + "epoch": 0.06, + "learning_rate": 5.988258017877224e-06, + "loss": 1.0821, + "step": 302 + }, + { + "epoch": 0.06, + "learning_rate": 5.988170936472167e-06, + "loss": 0.8503, + "step": 303 + }, + { + "epoch": 0.06, + "learning_rate": 5.988083533988355e-06, + "loss": 1.1176, + "step": 304 + }, + { + "epoch": 0.06, + "learning_rate": 5.987995810435181e-06, + "loss": 1.185, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 5.987907765822072e-06, + "loss": 1.1113, + "step": 306 + }, + { + "epoch": 0.06, + "learning_rate": 5.987819400158485e-06, + "loss": 1.1201, + "step": 307 + }, + { + "epoch": 0.06, + "learning_rate": 5.987730713453917e-06, + "loss": 1.0803, + "step": 308 + }, + { + "epoch": 0.06, + "learning_rate": 5.987641705717897e-06, + "loss": 1.0283, + "step": 309 + }, + { + "epoch": 0.06, + "learning_rate": 5.987552376959988e-06, + "loss": 0.9846, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 5.987462727189791e-06, + "loss": 0.8748, + "step": 311 + }, + { + "epoch": 0.06, + "learning_rate": 5.987372756416937e-06, + "loss": 1.15, + "step": 312 + }, + { + "epoch": 0.07, + "learning_rate": 5.987282464651092e-06, + "loss": 1.0994, + "step": 313 + }, + { + "epoch": 0.07, + "learning_rate": 5.9871918519019616e-06, + "loss": 0.8744, + "step": 314 + }, + { + "epoch": 0.07, + "learning_rate": 5.987100918179278e-06, + "loss": 1.1972, + "step": 315 + }, + { + "epoch": 0.07, + "learning_rate": 5.987009663492814e-06, + "loss": 1.0936, + "step": 316 + }, + { + "epoch": 0.07, + "learning_rate": 5.9869180878523755e-06, + "loss": 0.9805, + "step": 317 + }, + { + "epoch": 0.07, + "learning_rate": 5.986826191267801e-06, + "loss": 0.9992, + "step": 318 + }, + { + "epoch": 0.07, + "learning_rate": 5.986733973748966e-06, + "loss": 0.843, + "step": 319 + }, + { + "epoch": 0.07, + "learning_rate": 5.986641435305779e-06, + "loss": 0.9656, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 5.986548575948182e-06, + "loss": 0.9131, + "step": 321 + }, + { + "epoch": 0.07, + "learning_rate": 5.9864553956861546e-06, + "loss": 1.287, + "step": 322 + }, + { + "epoch": 0.07, + "learning_rate": 5.986361894529707e-06, + "loss": 0.8363, + "step": 323 + }, + { + "epoch": 0.07, + "learning_rate": 5.986268072488887e-06, + "loss": 1.0932, + "step": 324 + }, + { + "epoch": 0.07, + "learning_rate": 5.986173929573777e-06, + "loss": 0.7928, + "step": 325 + }, + { + "epoch": 0.07, + "learning_rate": 5.9860794657944895e-06, + "loss": 1.1357, + "step": 326 + }, + { + "epoch": 0.07, + "learning_rate": 5.985984681161177e-06, + "loss": 1.0201, + "step": 327 + }, + { + "epoch": 0.07, + "learning_rate": 5.9858895756840246e-06, + "loss": 1.2461, + "step": 328 + }, + { + "epoch": 0.07, + "learning_rate": 5.9857941493732495e-06, + "loss": 0.9724, + "step": 329 + }, + { + "epoch": 0.07, + "learning_rate": 5.9856984022391064e-06, + "loss": 0.9171, + "step": 330 + }, + { + "epoch": 0.07, + "learning_rate": 5.985602334291883e-06, + "loss": 0.9148, + "step": 331 + }, + { + "epoch": 0.07, + "learning_rate": 5.985505945541902e-06, + "loss": 0.9829, + "step": 332 + }, + { + "epoch": 0.07, + "learning_rate": 5.98540923599952e-06, + "loss": 1.0627, + "step": 333 + }, + { + "epoch": 0.07, + "learning_rate": 5.985312205675129e-06, + "loss": 1.0944, + "step": 334 + }, + { + "epoch": 0.07, + "learning_rate": 5.9852148545791535e-06, + "loss": 0.9828, + "step": 335 + }, + { + "epoch": 0.07, + "learning_rate": 5.985117182722055e-06, + "loss": 0.8804, + "step": 336 + }, + { + "epoch": 0.07, + "learning_rate": 5.98501919011433e-06, + "loss": 1.1282, + "step": 337 + }, + { + "epoch": 0.07, + "learning_rate": 5.984920876766505e-06, + "loss": 1.2349, + "step": 338 + }, + { + "epoch": 0.07, + "learning_rate": 5.984822242689144e-06, + "loss": 1.044, + "step": 339 + }, + { + "epoch": 0.07, + "learning_rate": 5.984723287892846e-06, + "loss": 1.3231, + "step": 340 + }, + { + "epoch": 0.07, + "learning_rate": 5.984624012388245e-06, + "loss": 0.9244, + "step": 341 + }, + { + "epoch": 0.07, + "learning_rate": 5.984524416186006e-06, + "loss": 0.8762, + "step": 342 + }, + { + "epoch": 0.07, + "learning_rate": 5.9844244992968315e-06, + "loss": 0.7774, + "step": 343 + }, + { + "epoch": 0.07, + "learning_rate": 5.984324261731458e-06, + "loss": 1.0715, + "step": 344 + }, + { + "epoch": 0.07, + "learning_rate": 5.984223703500654e-06, + "loss": 1.1075, + "step": 345 + }, + { + "epoch": 0.07, + "learning_rate": 5.984122824615229e-06, + "loss": 0.8161, + "step": 346 + }, + { + "epoch": 0.07, + "learning_rate": 5.984021625086017e-06, + "loss": 0.9664, + "step": 347 + }, + { + "epoch": 0.07, + "learning_rate": 5.983920104923895e-06, + "loss": 0.8845, + "step": 348 + }, + { + "epoch": 0.07, + "learning_rate": 5.98381826413977e-06, + "loss": 1.02, + "step": 349 + }, + { + "epoch": 0.07, + "learning_rate": 5.983716102744585e-06, + "loss": 1.0522, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 5.98361362074932e-06, + "loss": 1.1609, + "step": 351 + }, + { + "epoch": 0.07, + "learning_rate": 5.983510818164982e-06, + "loss": 1.0494, + "step": 352 + }, + { + "epoch": 0.07, + "learning_rate": 5.983407695002621e-06, + "loss": 1.0643, + "step": 353 + }, + { + "epoch": 0.07, + "learning_rate": 5.983304251273316e-06, + "loss": 1.0307, + "step": 354 + }, + { + "epoch": 0.07, + "learning_rate": 5.983200486988182e-06, + "loss": 1.0185, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 5.983096402158368e-06, + "loss": 0.9799, + "step": 356 + }, + { + "epoch": 0.07, + "learning_rate": 5.9829919967950585e-06, + "loss": 1.1228, + "step": 357 + }, + { + "epoch": 0.07, + "learning_rate": 5.982887270909472e-06, + "loss": 0.8885, + "step": 358 + }, + { + "epoch": 0.07, + "learning_rate": 5.982782224512861e-06, + "loss": 0.9031, + "step": 359 + }, + { + "epoch": 0.07, + "learning_rate": 5.9826768576165135e-06, + "loss": 1.0953, + "step": 360 + }, + { + "epoch": 0.08, + "learning_rate": 5.982571170231749e-06, + "loss": 1.0339, + "step": 361 + }, + { + "epoch": 0.08, + "learning_rate": 5.982465162369926e-06, + "loss": 0.9442, + "step": 362 + }, + { + "epoch": 0.08, + "learning_rate": 5.982358834042435e-06, + "loss": 0.8183, + "step": 363 + }, + { + "epoch": 0.08, + "learning_rate": 5.982252185260699e-06, + "loss": 0.9198, + "step": 364 + }, + { + "epoch": 0.08, + "learning_rate": 5.982145216036178e-06, + "loss": 1.1153, + "step": 365 + }, + { + "epoch": 0.08, + "learning_rate": 5.982037926380368e-06, + "loss": 1.1304, + "step": 366 + }, + { + "epoch": 0.08, + "learning_rate": 5.981930316304794e-06, + "loss": 0.9464, + "step": 367 + }, + { + "epoch": 0.08, + "learning_rate": 5.981822385821021e-06, + "loss": 1.2954, + "step": 368 + }, + { + "epoch": 0.08, + "learning_rate": 5.981714134940646e-06, + "loss": 1.0039, + "step": 369 + }, + { + "epoch": 0.08, + "learning_rate": 5.981605563675298e-06, + "loss": 1.1989, + "step": 370 + }, + { + "epoch": 0.08, + "learning_rate": 5.981496672036647e-06, + "loss": 1.0865, + "step": 371 + }, + { + "epoch": 0.08, + "learning_rate": 5.981387460036391e-06, + "loss": 0.8881, + "step": 372 + }, + { + "epoch": 0.08, + "learning_rate": 5.981277927686265e-06, + "loss": 1.0311, + "step": 373 + }, + { + "epoch": 0.08, + "learning_rate": 5.981168074998037e-06, + "loss": 0.8663, + "step": 374 + }, + { + "epoch": 0.08, + "learning_rate": 5.981057901983514e-06, + "loss": 0.9217, + "step": 375 + }, + { + "epoch": 0.08, + "learning_rate": 5.980947408654532e-06, + "loss": 1.1235, + "step": 376 + }, + { + "epoch": 0.08, + "learning_rate": 5.9808365950229625e-06, + "loss": 1.1253, + "step": 377 + }, + { + "epoch": 0.08, + "learning_rate": 5.980725461100715e-06, + "loss": 1.0498, + "step": 378 + }, + { + "epoch": 0.08, + "learning_rate": 5.980614006899728e-06, + "loss": 1.001, + "step": 379 + }, + { + "epoch": 0.08, + "learning_rate": 5.9805022324319786e-06, + "loss": 1.1628, + "step": 380 + }, + { + "epoch": 0.08, + "learning_rate": 5.980390137709478e-06, + "loss": 1.1906, + "step": 381 + }, + { + "epoch": 0.08, + "learning_rate": 5.98027772274427e-06, + "loss": 0.911, + "step": 382 + }, + { + "epoch": 0.08, + "learning_rate": 5.980164987548433e-06, + "loss": 0.8285, + "step": 383 + }, + { + "epoch": 0.08, + "learning_rate": 5.98005193213408e-06, + "loss": 1.0446, + "step": 384 + }, + { + "epoch": 0.08, + "learning_rate": 5.979938556513359e-06, + "loss": 1.0896, + "step": 385 + }, + { + "epoch": 0.08, + "learning_rate": 5.979824860698454e-06, + "loss": 1.1504, + "step": 386 + }, + { + "epoch": 0.08, + "learning_rate": 5.979710844701581e-06, + "loss": 1.2828, + "step": 387 + }, + { + "epoch": 0.08, + "learning_rate": 5.979596508534989e-06, + "loss": 0.9782, + "step": 388 + }, + { + "epoch": 0.08, + "learning_rate": 5.979481852210965e-06, + "loss": 0.9754, + "step": 389 + }, + { + "epoch": 0.08, + "learning_rate": 5.979366875741829e-06, + "loss": 1.1549, + "step": 390 + }, + { + "epoch": 0.08, + "learning_rate": 5.979251579139935e-06, + "loss": 0.9424, + "step": 391 + }, + { + "epoch": 0.08, + "learning_rate": 5.979135962417671e-06, + "loss": 1.0477, + "step": 392 + }, + { + "epoch": 0.08, + "learning_rate": 5.9790200255874605e-06, + "loss": 0.7897, + "step": 393 + }, + { + "epoch": 0.08, + "learning_rate": 5.978903768661761e-06, + "loss": 0.8454, + "step": 394 + }, + { + "epoch": 0.08, + "learning_rate": 5.978787191653064e-06, + "loss": 0.9438, + "step": 395 + }, + { + "epoch": 0.08, + "learning_rate": 5.978670294573896e-06, + "loss": 1.0034, + "step": 396 + }, + { + "epoch": 0.08, + "learning_rate": 5.978553077436817e-06, + "loss": 0.9945, + "step": 397 + }, + { + "epoch": 0.08, + "learning_rate": 5.978435540254422e-06, + "loss": 0.999, + "step": 398 + }, + { + "epoch": 0.08, + "learning_rate": 5.978317683039342e-06, + "loss": 0.8327, + "step": 399 + }, + { + "epoch": 0.08, + "learning_rate": 5.978199505804239e-06, + "loss": 0.8786, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 5.978081008561811e-06, + "loss": 1.1112, + "step": 401 + }, + { + "epoch": 0.08, + "learning_rate": 5.977962191324792e-06, + "loss": 0.9308, + "step": 402 + }, + { + "epoch": 0.08, + "learning_rate": 5.977843054105947e-06, + "loss": 0.975, + "step": 403 + }, + { + "epoch": 0.08, + "learning_rate": 5.9777235969180795e-06, + "loss": 0.979, + "step": 404 + }, + { + "epoch": 0.08, + "learning_rate": 5.9776038197740235e-06, + "loss": 0.8599, + "step": 405 + }, + { + "epoch": 0.08, + "learning_rate": 5.97748372268665e-06, + "loss": 0.9853, + "step": 406 + }, + { + "epoch": 0.08, + "learning_rate": 5.977363305668862e-06, + "loss": 0.8872, + "step": 407 + }, + { + "epoch": 0.08, + "learning_rate": 5.9772425687335995e-06, + "loss": 0.8615, + "step": 408 + }, + { + "epoch": 0.09, + "learning_rate": 5.9771215118938355e-06, + "loss": 0.9343, + "step": 409 + }, + { + "epoch": 0.09, + "learning_rate": 5.977000135162578e-06, + "loss": 1.0547, + "step": 410 + }, + { + "epoch": 0.09, + "learning_rate": 5.976878438552868e-06, + "loss": 1.1056, + "step": 411 + }, + { + "epoch": 0.09, + "learning_rate": 5.976756422077782e-06, + "loss": 0.9628, + "step": 412 + }, + { + "epoch": 0.09, + "learning_rate": 5.976634085750432e-06, + "loss": 1.3367, + "step": 413 + }, + { + "epoch": 0.09, + "learning_rate": 5.97651142958396e-06, + "loss": 1.096, + "step": 414 + }, + { + "epoch": 0.09, + "learning_rate": 5.976388453591548e-06, + "loss": 0.8851, + "step": 415 + }, + { + "epoch": 0.09, + "learning_rate": 5.9762651577864095e-06, + "loss": 1.003, + "step": 416 + }, + { + "epoch": 0.09, + "learning_rate": 5.976141542181792e-06, + "loss": 0.6459, + "step": 417 + }, + { + "epoch": 0.09, + "learning_rate": 5.976017606790979e-06, + "loss": 1.1279, + "step": 418 + }, + { + "epoch": 0.09, + "learning_rate": 5.9758933516272854e-06, + "loss": 1.3032, + "step": 419 + }, + { + "epoch": 0.09, + "learning_rate": 5.975768776704064e-06, + "loss": 0.8337, + "step": 420 + }, + { + "epoch": 0.09, + "learning_rate": 5.975643882034699e-06, + "loss": 1.1416, + "step": 421 + }, + { + "epoch": 0.09, + "learning_rate": 5.975518667632612e-06, + "loss": 1.1516, + "step": 422 + }, + { + "epoch": 0.09, + "learning_rate": 5.975393133511257e-06, + "loss": 0.7993, + "step": 423 + }, + { + "epoch": 0.09, + "learning_rate": 5.975267279684121e-06, + "loss": 1.057, + "step": 424 + }, + { + "epoch": 0.09, + "learning_rate": 5.975141106164729e-06, + "loss": 1.0412, + "step": 425 + }, + { + "epoch": 0.09, + "learning_rate": 5.975014612966637e-06, + "loss": 1.2189, + "step": 426 + }, + { + "epoch": 0.09, + "learning_rate": 5.974887800103437e-06, + "loss": 1.2125, + "step": 427 + }, + { + "epoch": 0.09, + "learning_rate": 5.974760667588756e-06, + "loss": 1.025, + "step": 428 + }, + { + "epoch": 0.09, + "learning_rate": 5.974633215436253e-06, + "loss": 1.0234, + "step": 429 + }, + { + "epoch": 0.09, + "learning_rate": 5.974505443659622e-06, + "loss": 1.0045, + "step": 430 + }, + { + "epoch": 0.09, + "learning_rate": 5.9743773522725955e-06, + "loss": 0.9311, + "step": 431 + }, + { + "epoch": 0.09, + "learning_rate": 5.974248941288934e-06, + "loss": 1.062, + "step": 432 + }, + { + "epoch": 0.09, + "learning_rate": 5.974120210722436e-06, + "loss": 0.9071, + "step": 433 + }, + { + "epoch": 0.09, + "learning_rate": 5.973991160586933e-06, + "loss": 0.9888, + "step": 434 + }, + { + "epoch": 0.09, + "learning_rate": 5.973861790896292e-06, + "loss": 0.9163, + "step": 435 + }, + { + "epoch": 0.09, + "learning_rate": 5.973732101664415e-06, + "loss": 0.9302, + "step": 436 + }, + { + "epoch": 0.09, + "learning_rate": 5.9736020929052345e-06, + "loss": 1.1172, + "step": 437 + }, + { + "epoch": 0.09, + "learning_rate": 5.973471764632722e-06, + "loss": 0.9883, + "step": 438 + }, + { + "epoch": 0.09, + "learning_rate": 5.97334111686088e-06, + "loss": 0.8618, + "step": 439 + }, + { + "epoch": 0.09, + "learning_rate": 5.973210149603748e-06, + "loss": 1.0525, + "step": 440 + }, + { + "epoch": 0.09, + "learning_rate": 5.973078862875397e-06, + "loss": 0.9544, + "step": 441 + }, + { + "epoch": 0.09, + "learning_rate": 5.972947256689934e-06, + "loss": 1.1121, + "step": 442 + }, + { + "epoch": 0.09, + "learning_rate": 5.972815331061502e-06, + "loss": 0.9561, + "step": 443 + }, + { + "epoch": 0.09, + "learning_rate": 5.9726830860042736e-06, + "loss": 0.8641, + "step": 444 + }, + { + "epoch": 0.09, + "learning_rate": 5.972550521532459e-06, + "loss": 1.0043, + "step": 445 + }, + { + "epoch": 0.09, + "learning_rate": 5.972417637660304e-06, + "loss": 0.9515, + "step": 446 + }, + { + "epoch": 0.09, + "learning_rate": 5.972284434402086e-06, + "loss": 1.0395, + "step": 447 + }, + { + "epoch": 0.09, + "learning_rate": 5.972150911772118e-06, + "loss": 0.9791, + "step": 448 + }, + { + "epoch": 0.09, + "learning_rate": 5.972017069784745e-06, + "loss": 1.0428, + "step": 449 + }, + { + "epoch": 0.09, + "learning_rate": 5.971882908454352e-06, + "loss": 0.829, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 5.971748427795351e-06, + "loss": 0.8857, + "step": 451 + }, + { + "epoch": 0.09, + "learning_rate": 5.971613627822194e-06, + "loss": 1.2787, + "step": 452 + }, + { + "epoch": 0.09, + "learning_rate": 5.971478508549365e-06, + "loss": 0.964, + "step": 453 + }, + { + "epoch": 0.09, + "learning_rate": 5.971343069991381e-06, + "loss": 0.8449, + "step": 454 + }, + { + "epoch": 0.09, + "learning_rate": 5.971207312162798e-06, + "loss": 0.988, + "step": 455 + }, + { + "epoch": 0.09, + "learning_rate": 5.971071235078201e-06, + "loss": 0.8777, + "step": 456 + }, + { + "epoch": 0.1, + "learning_rate": 5.970934838752211e-06, + "loss": 0.963, + "step": 457 + }, + { + "epoch": 0.1, + "learning_rate": 5.9707981231994845e-06, + "loss": 1.0039, + "step": 458 + }, + { + "epoch": 0.1, + "learning_rate": 5.970661088434712e-06, + "loss": 0.9266, + "step": 459 + }, + { + "epoch": 0.1, + "learning_rate": 5.970523734472618e-06, + "loss": 1.1094, + "step": 460 + }, + { + "epoch": 0.1, + "learning_rate": 5.97038606132796e-06, + "loss": 1.0275, + "step": 461 + }, + { + "epoch": 0.1, + "learning_rate": 5.970248069015532e-06, + "loss": 1.0736, + "step": 462 + }, + { + "epoch": 0.1, + "learning_rate": 5.970109757550161e-06, + "loss": 1.0078, + "step": 463 + }, + { + "epoch": 0.1, + "learning_rate": 5.9699711269467085e-06, + "loss": 0.9505, + "step": 464 + }, + { + "epoch": 0.1, + "learning_rate": 5.96983217722007e-06, + "loss": 0.7334, + "step": 465 + }, + { + "epoch": 0.1, + "learning_rate": 5.969692908385177e-06, + "loss": 1.1101, + "step": 466 + }, + { + "epoch": 0.1, + "learning_rate": 5.969553320456991e-06, + "loss": 1.0431, + "step": 467 + }, + { + "epoch": 0.1, + "learning_rate": 5.969413413450514e-06, + "loss": 0.9238, + "step": 468 + }, + { + "epoch": 0.1, + "learning_rate": 5.969273187380778e-06, + "loss": 0.979, + "step": 469 + }, + { + "epoch": 0.1, + "learning_rate": 5.969132642262849e-06, + "loss": 1.2238, + "step": 470 + }, + { + "epoch": 0.1, + "learning_rate": 5.968991778111829e-06, + "loss": 1.0796, + "step": 471 + }, + { + "epoch": 0.1, + "learning_rate": 5.968850594942856e-06, + "loss": 0.9291, + "step": 472 + }, + { + "epoch": 0.1, + "learning_rate": 5.968709092771097e-06, + "loss": 0.8568, + "step": 473 + }, + { + "epoch": 0.1, + "learning_rate": 5.968567271611759e-06, + "loss": 0.8486, + "step": 474 + }, + { + "epoch": 0.1, + "learning_rate": 5.968425131480078e-06, + "loss": 1.1037, + "step": 475 + }, + { + "epoch": 0.1, + "learning_rate": 5.968282672391329e-06, + "loss": 1.1142, + "step": 476 + }, + { + "epoch": 0.1, + "learning_rate": 5.968139894360819e-06, + "loss": 1.0361, + "step": 477 + }, + { + "epoch": 0.1, + "learning_rate": 5.967996797403889e-06, + "loss": 1.0687, + "step": 478 + }, + { + "epoch": 0.1, + "learning_rate": 5.9678533815359155e-06, + "loss": 0.9581, + "step": 479 + }, + { + "epoch": 0.1, + "learning_rate": 5.967709646772307e-06, + "loss": 1.1905, + "step": 480 + }, + { + "epoch": 0.1, + "learning_rate": 5.967565593128509e-06, + "loss": 0.9304, + "step": 481 + }, + { + "epoch": 0.1, + "learning_rate": 5.9674212206199995e-06, + "loss": 0.9152, + "step": 482 + }, + { + "epoch": 0.1, + "learning_rate": 5.9672765292622926e-06, + "loss": 0.9298, + "step": 483 + }, + { + "epoch": 0.1, + "learning_rate": 5.967131519070934e-06, + "loss": 1.2012, + "step": 484 + }, + { + "epoch": 0.1, + "learning_rate": 5.966986190061505e-06, + "loss": 1.1008, + "step": 485 + }, + { + "epoch": 0.1, + "learning_rate": 5.9668405422496215e-06, + "loss": 0.8475, + "step": 486 + }, + { + "epoch": 0.1, + "learning_rate": 5.966694575650933e-06, + "loss": 1.2126, + "step": 487 + }, + { + "epoch": 0.1, + "learning_rate": 5.966548290281125e-06, + "loss": 0.9447, + "step": 488 + }, + { + "epoch": 0.1, + "learning_rate": 5.966401686155915e-06, + "loss": 1.0535, + "step": 489 + }, + { + "epoch": 0.1, + "learning_rate": 5.966254763291055e-06, + "loss": 0.9571, + "step": 490 + }, + { + "epoch": 0.1, + "learning_rate": 5.966107521702333e-06, + "loss": 0.9626, + "step": 491 + }, + { + "epoch": 0.1, + "learning_rate": 5.965959961405569e-06, + "loss": 0.9257, + "step": 492 + }, + { + "epoch": 0.1, + "learning_rate": 5.965812082416618e-06, + "loss": 1.1311, + "step": 493 + }, + { + "epoch": 0.1, + "learning_rate": 5.965663884751372e-06, + "loss": 1.1409, + "step": 494 + }, + { + "epoch": 0.1, + "learning_rate": 5.9655153684257515e-06, + "loss": 1.1109, + "step": 495 + }, + { + "epoch": 0.1, + "learning_rate": 5.965366533455717e-06, + "loss": 1.0314, + "step": 496 + }, + { + "epoch": 0.1, + "learning_rate": 5.96521737985726e-06, + "loss": 1.0284, + "step": 497 + }, + { + "epoch": 0.1, + "learning_rate": 5.965067907646408e-06, + "loss": 0.9187, + "step": 498 + }, + { + "epoch": 0.1, + "learning_rate": 5.96491811683922e-06, + "loss": 0.9242, + "step": 499 + }, + { + "epoch": 0.1, + "learning_rate": 5.964768007451792e-06, + "loss": 0.8744, + "step": 500 + }, + { + "epoch": 0.1, + "learning_rate": 5.964617579500253e-06, + "loss": 1.0825, + "step": 501 + }, + { + "epoch": 0.1, + "learning_rate": 5.964466833000767e-06, + "loss": 0.8537, + "step": 502 + }, + { + "epoch": 0.1, + "learning_rate": 5.964315767969532e-06, + "loss": 1.0924, + "step": 503 + }, + { + "epoch": 0.1, + "learning_rate": 5.964164384422778e-06, + "loss": 1.2262, + "step": 504 + }, + { + "epoch": 0.11, + "learning_rate": 5.964012682376773e-06, + "loss": 0.9773, + "step": 505 + }, + { + "epoch": 0.11, + "learning_rate": 5.963860661847817e-06, + "loss": 1.1186, + "step": 506 + }, + { + "epoch": 0.11, + "learning_rate": 5.963708322852244e-06, + "loss": 1.021, + "step": 507 + }, + { + "epoch": 0.11, + "learning_rate": 5.963555665406423e-06, + "loss": 0.9731, + "step": 508 + }, + { + "epoch": 0.11, + "learning_rate": 5.963402689526757e-06, + "loss": 1.068, + "step": 509 + }, + { + "epoch": 0.11, + "learning_rate": 5.9632493952296845e-06, + "loss": 0.927, + "step": 510 + }, + { + "epoch": 0.11, + "learning_rate": 5.963095782531676e-06, + "loss": 1.0042, + "step": 511 + }, + { + "epoch": 0.11, + "learning_rate": 5.962941851449236e-06, + "loss": 0.9498, + "step": 512 + }, + { + "epoch": 0.11, + "learning_rate": 5.962787601998906e-06, + "loss": 0.8147, + "step": 513 + }, + { + "epoch": 0.11, + "learning_rate": 5.96263303419726e-06, + "loss": 0.9474, + "step": 514 + }, + { + "epoch": 0.11, + "learning_rate": 5.962478148060905e-06, + "loss": 0.9219, + "step": 515 + }, + { + "epoch": 0.11, + "learning_rate": 5.962322943606485e-06, + "loss": 1.0259, + "step": 516 + }, + { + "epoch": 0.11, + "learning_rate": 5.962167420850676e-06, + "loss": 1.0261, + "step": 517 + }, + { + "epoch": 0.11, + "learning_rate": 5.962011579810189e-06, + "loss": 0.943, + "step": 518 + }, + { + "epoch": 0.11, + "learning_rate": 5.96185542050177e-06, + "loss": 0.9979, + "step": 519 + }, + { + "epoch": 0.11, + "learning_rate": 5.961698942942196e-06, + "loss": 0.9773, + "step": 520 + }, + { + "epoch": 0.11, + "learning_rate": 5.961542147148282e-06, + "loss": 0.8481, + "step": 521 + }, + { + "epoch": 0.11, + "learning_rate": 5.961385033136876e-06, + "loss": 0.8482, + "step": 522 + }, + { + "epoch": 0.11, + "learning_rate": 5.961227600924859e-06, + "loss": 1.1898, + "step": 523 + }, + { + "epoch": 0.11, + "learning_rate": 5.961069850529149e-06, + "loss": 0.9799, + "step": 524 + }, + { + "epoch": 0.11, + "learning_rate": 5.960911781966694e-06, + "loss": 1.0628, + "step": 525 + }, + { + "epoch": 0.11, + "learning_rate": 5.960753395254479e-06, + "loss": 1.0569, + "step": 526 + }, + { + "epoch": 0.11, + "learning_rate": 5.960594690409524e-06, + "loss": 0.8862, + "step": 527 + }, + { + "epoch": 0.11, + "learning_rate": 5.96043566744888e-06, + "loss": 1.1338, + "step": 528 + }, + { + "epoch": 0.11, + "learning_rate": 5.9602763263896345e-06, + "loss": 0.908, + "step": 529 + }, + { + "epoch": 0.11, + "learning_rate": 5.960116667248909e-06, + "loss": 1.006, + "step": 530 + }, + { + "epoch": 0.11, + "learning_rate": 5.959956690043859e-06, + "loss": 1.2493, + "step": 531 + }, + { + "epoch": 0.11, + "learning_rate": 5.959796394791674e-06, + "loss": 0.9802, + "step": 532 + }, + { + "epoch": 0.11, + "learning_rate": 5.959635781509578e-06, + "loss": 0.9829, + "step": 533 + }, + { + "epoch": 0.11, + "learning_rate": 5.959474850214828e-06, + "loss": 0.8838, + "step": 534 + }, + { + "epoch": 0.11, + "learning_rate": 5.959313600924717e-06, + "loss": 0.9058, + "step": 535 + }, + { + "epoch": 0.11, + "learning_rate": 5.95915203365657e-06, + "loss": 1.1249, + "step": 536 + }, + { + "epoch": 0.11, + "learning_rate": 5.9589901484277485e-06, + "loss": 0.9173, + "step": 537 + }, + { + "epoch": 0.11, + "learning_rate": 5.958827945255647e-06, + "loss": 0.8104, + "step": 538 + }, + { + "epoch": 0.11, + "learning_rate": 5.958665424157693e-06, + "loss": 0.8825, + "step": 539 + }, + { + "epoch": 0.11, + "learning_rate": 5.958502585151351e-06, + "loss": 0.985, + "step": 540 + }, + { + "epoch": 0.11, + "learning_rate": 5.958339428254118e-06, + "loss": 1.1024, + "step": 541 + }, + { + "epoch": 0.11, + "learning_rate": 5.958175953483523e-06, + "loss": 0.9831, + "step": 542 + }, + { + "epoch": 0.11, + "learning_rate": 5.958012160857134e-06, + "loss": 0.9676, + "step": 543 + }, + { + "epoch": 0.11, + "learning_rate": 5.957848050392549e-06, + "loss": 1.0474, + "step": 544 + }, + { + "epoch": 0.11, + "learning_rate": 5.957683622107402e-06, + "loss": 0.9944, + "step": 545 + }, + { + "epoch": 0.11, + "learning_rate": 5.957518876019361e-06, + "loss": 1.0794, + "step": 546 + }, + { + "epoch": 0.11, + "learning_rate": 5.957353812146129e-06, + "loss": 0.7696, + "step": 547 + }, + { + "epoch": 0.11, + "learning_rate": 5.957188430505438e-06, + "loss": 1.2139, + "step": 548 + }, + { + "epoch": 0.11, + "learning_rate": 5.957022731115064e-06, + "loss": 0.8747, + "step": 549 + }, + { + "epoch": 0.11, + "learning_rate": 5.956856713992807e-06, + "loss": 1.101, + "step": 550 + }, + { + "epoch": 0.11, + "learning_rate": 5.956690379156508e-06, + "loss": 1.1761, + "step": 551 + }, + { + "epoch": 0.11, + "learning_rate": 5.9565237266240375e-06, + "loss": 0.8549, + "step": 552 + }, + { + "epoch": 0.12, + "learning_rate": 5.956356756413305e-06, + "loss": 0.9771, + "step": 553 + }, + { + "epoch": 0.12, + "learning_rate": 5.9561894685422495e-06, + "loss": 1.1107, + "step": 554 + }, + { + "epoch": 0.12, + "learning_rate": 5.956021863028845e-06, + "loss": 1.0797, + "step": 555 + }, + { + "epoch": 0.12, + "learning_rate": 5.955853939891105e-06, + "loss": 0.8847, + "step": 556 + }, + { + "epoch": 0.12, + "learning_rate": 5.955685699147068e-06, + "loss": 0.9926, + "step": 557 + }, + { + "epoch": 0.12, + "learning_rate": 5.955517140814814e-06, + "loss": 1.0678, + "step": 558 + }, + { + "epoch": 0.12, + "learning_rate": 5.955348264912455e-06, + "loss": 1.0061, + "step": 559 + }, + { + "epoch": 0.12, + "learning_rate": 5.955179071458135e-06, + "loss": 0.9567, + "step": 560 + }, + { + "epoch": 0.12, + "learning_rate": 5.955009560470034e-06, + "loss": 0.8493, + "step": 561 + }, + { + "epoch": 0.12, + "learning_rate": 5.954839731966368e-06, + "loss": 0.9829, + "step": 562 + }, + { + "epoch": 0.12, + "learning_rate": 5.954669585965383e-06, + "loss": 0.8007, + "step": 563 + }, + { + "epoch": 0.12, + "learning_rate": 5.954499122485362e-06, + "loss": 1.1774, + "step": 564 + }, + { + "epoch": 0.12, + "learning_rate": 5.954328341544621e-06, + "loss": 0.8259, + "step": 565 + }, + { + "epoch": 0.12, + "learning_rate": 5.954157243161511e-06, + "loss": 1.0526, + "step": 566 + }, + { + "epoch": 0.12, + "learning_rate": 5.953985827354415e-06, + "loss": 0.8831, + "step": 567 + }, + { + "epoch": 0.12, + "learning_rate": 5.953814094141754e-06, + "loss": 0.9272, + "step": 568 + }, + { + "epoch": 0.12, + "learning_rate": 5.9536420435419785e-06, + "loss": 1.1278, + "step": 569 + }, + { + "epoch": 0.12, + "learning_rate": 5.953469675573576e-06, + "loss": 0.8965, + "step": 570 + }, + { + "epoch": 0.12, + "learning_rate": 5.953296990255068e-06, + "loss": 0.9866, + "step": 571 + }, + { + "epoch": 0.12, + "learning_rate": 5.953123987605008e-06, + "loss": 0.99, + "step": 572 + }, + { + "epoch": 0.12, + "learning_rate": 5.952950667641988e-06, + "loss": 0.9218, + "step": 573 + }, + { + "epoch": 0.12, + "learning_rate": 5.952777030384629e-06, + "loss": 1.0053, + "step": 574 + }, + { + "epoch": 0.12, + "learning_rate": 5.952603075851588e-06, + "loss": 1.0152, + "step": 575 + }, + { + "epoch": 0.12, + "learning_rate": 5.952428804061558e-06, + "loss": 0.9785, + "step": 576 + }, + { + "epoch": 0.12, + "learning_rate": 5.952254215033263e-06, + "loss": 0.8592, + "step": 577 + }, + { + "epoch": 0.12, + "learning_rate": 5.952079308785465e-06, + "loss": 1.031, + "step": 578 + }, + { + "epoch": 0.12, + "learning_rate": 5.951904085336953e-06, + "loss": 1.194, + "step": 579 + }, + { + "epoch": 0.12, + "learning_rate": 5.95172854470656e-06, + "loss": 1.0446, + "step": 580 + }, + { + "epoch": 0.12, + "learning_rate": 5.951552686913145e-06, + "loss": 1.0007, + "step": 581 + }, + { + "epoch": 0.12, + "learning_rate": 5.951376511975604e-06, + "loss": 0.8843, + "step": 582 + }, + { + "epoch": 0.12, + "learning_rate": 5.951200019912868e-06, + "loss": 0.8538, + "step": 583 + }, + { + "epoch": 0.12, + "learning_rate": 5.9510232107438995e-06, + "loss": 1.0139, + "step": 584 + }, + { + "epoch": 0.12, + "learning_rate": 5.9508460844876984e-06, + "loss": 0.9429, + "step": 585 + }, + { + "epoch": 0.12, + "learning_rate": 5.950668641163296e-06, + "loss": 1.2218, + "step": 586 + }, + { + "epoch": 0.12, + "learning_rate": 5.950490880789758e-06, + "loss": 0.9773, + "step": 587 + }, + { + "epoch": 0.12, + "learning_rate": 5.950312803386186e-06, + "loss": 1.0143, + "step": 588 + }, + { + "epoch": 0.12, + "learning_rate": 5.950134408971714e-06, + "loss": 1.088, + "step": 589 + }, + { + "epoch": 0.12, + "learning_rate": 5.949955697565509e-06, + "loss": 1.0467, + "step": 590 + }, + { + "epoch": 0.12, + "learning_rate": 5.949776669186776e-06, + "loss": 0.8823, + "step": 591 + }, + { + "epoch": 0.12, + "learning_rate": 5.949597323854751e-06, + "loss": 1.0345, + "step": 592 + }, + { + "epoch": 0.12, + "learning_rate": 5.949417661588703e-06, + "loss": 1.2222, + "step": 593 + }, + { + "epoch": 0.12, + "learning_rate": 5.949237682407937e-06, + "loss": 0.7486, + "step": 594 + }, + { + "epoch": 0.12, + "learning_rate": 5.9490573863317935e-06, + "loss": 0.957, + "step": 595 + }, + { + "epoch": 0.12, + "learning_rate": 5.948876773379645e-06, + "loss": 0.9091, + "step": 596 + }, + { + "epoch": 0.12, + "learning_rate": 5.9486958435708966e-06, + "loss": 0.8557, + "step": 597 + }, + { + "epoch": 0.12, + "learning_rate": 5.9485145969249895e-06, + "loss": 1.1901, + "step": 598 + }, + { + "epoch": 0.12, + "learning_rate": 5.948333033461401e-06, + "loss": 0.9139, + "step": 599 + }, + { + "epoch": 0.12, + "learning_rate": 5.948151153199637e-06, + "loss": 0.9506, + "step": 600 + }, + { + "epoch": 0.12, + "learning_rate": 5.947968956159243e-06, + "loss": 1.0845, + "step": 601 + }, + { + "epoch": 0.13, + "learning_rate": 5.947786442359795e-06, + "loss": 1.0896, + "step": 602 + }, + { + "epoch": 0.13, + "learning_rate": 5.947603611820904e-06, + "loss": 1.0381, + "step": 603 + }, + { + "epoch": 0.13, + "learning_rate": 5.9474204645622144e-06, + "loss": 0.9002, + "step": 604 + }, + { + "epoch": 0.13, + "learning_rate": 5.947237000603407e-06, + "loss": 1.33, + "step": 605 + }, + { + "epoch": 0.13, + "learning_rate": 5.9470532199641946e-06, + "loss": 0.9552, + "step": 606 + }, + { + "epoch": 0.13, + "learning_rate": 5.946869122664323e-06, + "loss": 0.886, + "step": 607 + }, + { + "epoch": 0.13, + "learning_rate": 5.946684708723575e-06, + "loss": 0.8843, + "step": 608 + }, + { + "epoch": 0.13, + "learning_rate": 5.946499978161765e-06, + "loss": 0.9104, + "step": 609 + }, + { + "epoch": 0.13, + "learning_rate": 5.946314930998743e-06, + "loss": 1.1829, + "step": 610 + }, + { + "epoch": 0.13, + "learning_rate": 5.946129567254392e-06, + "loss": 0.9859, + "step": 611 + }, + { + "epoch": 0.13, + "learning_rate": 5.945943886948629e-06, + "loss": 1.0122, + "step": 612 + }, + { + "epoch": 0.13, + "learning_rate": 5.945757890101405e-06, + "loss": 0.9271, + "step": 613 + }, + { + "epoch": 0.13, + "learning_rate": 5.945571576732706e-06, + "loss": 0.9198, + "step": 614 + }, + { + "epoch": 0.13, + "learning_rate": 5.945384946862552e-06, + "loss": 0.9032, + "step": 615 + }, + { + "epoch": 0.13, + "learning_rate": 5.945198000510996e-06, + "loss": 1.1748, + "step": 616 + }, + { + "epoch": 0.13, + "learning_rate": 5.945010737698124e-06, + "loss": 0.9531, + "step": 617 + }, + { + "epoch": 0.13, + "learning_rate": 5.944823158444059e-06, + "loss": 0.9518, + "step": 618 + }, + { + "epoch": 0.13, + "learning_rate": 5.944635262768956e-06, + "loss": 0.8515, + "step": 619 + }, + { + "epoch": 0.13, + "learning_rate": 5.944447050693003e-06, + "loss": 0.9075, + "step": 620 + }, + { + "epoch": 0.13, + "learning_rate": 5.944258522236426e-06, + "loss": 1.2077, + "step": 621 + }, + { + "epoch": 0.13, + "learning_rate": 5.944069677419479e-06, + "loss": 0.7758, + "step": 622 + }, + { + "epoch": 0.13, + "learning_rate": 5.943880516262457e-06, + "loss": 0.901, + "step": 623 + }, + { + "epoch": 0.13, + "learning_rate": 5.9436910387856835e-06, + "loss": 0.8593, + "step": 624 + }, + { + "epoch": 0.13, + "learning_rate": 5.9435012450095165e-06, + "loss": 1.041, + "step": 625 + }, + { + "epoch": 0.13, + "learning_rate": 5.9433111349543515e-06, + "loss": 0.8667, + "step": 626 + }, + { + "epoch": 0.13, + "learning_rate": 5.943120708640615e-06, + "loss": 0.9762, + "step": 627 + }, + { + "epoch": 0.13, + "learning_rate": 5.942929966088769e-06, + "loss": 0.8367, + "step": 628 + }, + { + "epoch": 0.13, + "learning_rate": 5.942738907319308e-06, + "loss": 1.2601, + "step": 629 + }, + { + "epoch": 0.13, + "learning_rate": 5.942547532352761e-06, + "loss": 1.1113, + "step": 630 + }, + { + "epoch": 0.13, + "learning_rate": 5.942355841209692e-06, + "loss": 0.9258, + "step": 631 + }, + { + "epoch": 0.13, + "learning_rate": 5.942163833910697e-06, + "loss": 1.1866, + "step": 632 + }, + { + "epoch": 0.13, + "learning_rate": 5.941971510476409e-06, + "loss": 1.3457, + "step": 633 + }, + { + "epoch": 0.13, + "learning_rate": 5.941778870927491e-06, + "loss": 0.8497, + "step": 634 + }, + { + "epoch": 0.13, + "learning_rate": 5.941585915284644e-06, + "loss": 0.9485, + "step": 635 + }, + { + "epoch": 0.13, + "learning_rate": 5.941392643568601e-06, + "loss": 0.8543, + "step": 636 + }, + { + "epoch": 0.13, + "learning_rate": 5.941199055800127e-06, + "loss": 1.0481, + "step": 637 + }, + { + "epoch": 0.13, + "learning_rate": 5.941005152000025e-06, + "loss": 0.8876, + "step": 638 + }, + { + "epoch": 0.13, + "learning_rate": 5.940810932189129e-06, + "loss": 1.1715, + "step": 639 + }, + { + "epoch": 0.13, + "learning_rate": 5.940616396388309e-06, + "loss": 0.9594, + "step": 640 + }, + { + "epoch": 0.13, + "learning_rate": 5.940421544618466e-06, + "loss": 1.0175, + "step": 641 + }, + { + "epoch": 0.13, + "learning_rate": 5.940226376900539e-06, + "loss": 1.1028, + "step": 642 + }, + { + "epoch": 0.13, + "learning_rate": 5.9400308932554965e-06, + "loss": 1.4146, + "step": 643 + }, + { + "epoch": 0.13, + "learning_rate": 5.939835093704344e-06, + "loss": 0.7309, + "step": 644 + }, + { + "epoch": 0.13, + "learning_rate": 5.939638978268121e-06, + "loss": 1.001, + "step": 645 + }, + { + "epoch": 0.13, + "learning_rate": 5.9394425469679005e-06, + "loss": 0.8115, + "step": 646 + }, + { + "epoch": 0.13, + "learning_rate": 5.939245799824787e-06, + "loss": 0.8122, + "step": 647 + }, + { + "epoch": 0.13, + "learning_rate": 5.939048736859923e-06, + "loss": 0.8703, + "step": 648 + }, + { + "epoch": 0.13, + "learning_rate": 5.9388513580944806e-06, + "loss": 0.8944, + "step": 649 + }, + { + "epoch": 0.14, + "learning_rate": 5.93865366354967e-06, + "loss": 1.3751, + "step": 650 + }, + { + "epoch": 0.14, + "learning_rate": 5.938455653246733e-06, + "loss": 0.8435, + "step": 651 + }, + { + "epoch": 0.14, + "learning_rate": 5.938257327206946e-06, + "loss": 1.1464, + "step": 652 + }, + { + "epoch": 0.14, + "learning_rate": 5.938058685451618e-06, + "loss": 1.0687, + "step": 653 + }, + { + "epoch": 0.14, + "learning_rate": 5.9378597280020955e-06, + "loss": 0.9041, + "step": 654 + }, + { + "epoch": 0.14, + "learning_rate": 5.9376604548797545e-06, + "loss": 1.1421, + "step": 655 + }, + { + "epoch": 0.14, + "learning_rate": 5.937460866106007e-06, + "loss": 1.0545, + "step": 656 + }, + { + "epoch": 0.14, + "learning_rate": 5.937260961702299e-06, + "loss": 0.9779, + "step": 657 + }, + { + "epoch": 0.14, + "learning_rate": 5.937060741690109e-06, + "loss": 0.9814, + "step": 658 + }, + { + "epoch": 0.14, + "learning_rate": 5.936860206090953e-06, + "loss": 0.9644, + "step": 659 + }, + { + "epoch": 0.14, + "learning_rate": 5.936659354926378e-06, + "loss": 0.9857, + "step": 660 + }, + { + "epoch": 0.14, + "learning_rate": 5.936458188217964e-06, + "loss": 1.0309, + "step": 661 + }, + { + "epoch": 0.14, + "learning_rate": 5.936256705987327e-06, + "loss": 1.0658, + "step": 662 + }, + { + "epoch": 0.14, + "learning_rate": 5.9360549082561175e-06, + "loss": 0.8577, + "step": 663 + }, + { + "epoch": 0.14, + "learning_rate": 5.935852795046017e-06, + "loss": 1.1155, + "step": 664 + }, + { + "epoch": 0.14, + "learning_rate": 5.935650366378743e-06, + "loss": 0.9284, + "step": 665 + }, + { + "epoch": 0.14, + "learning_rate": 5.935447622276047e-06, + "loss": 0.8834, + "step": 666 + }, + { + "epoch": 0.14, + "learning_rate": 5.935244562759714e-06, + "loss": 1.0205, + "step": 667 + }, + { + "epoch": 0.14, + "learning_rate": 5.935041187851561e-06, + "loss": 0.9797, + "step": 668 + }, + { + "epoch": 0.14, + "learning_rate": 5.934837497573442e-06, + "loss": 0.9251, + "step": 669 + }, + { + "epoch": 0.14, + "learning_rate": 5.934633491947243e-06, + "loss": 0.9555, + "step": 670 + }, + { + "epoch": 0.14, + "learning_rate": 5.934429170994886e-06, + "loss": 1.0192, + "step": 671 + }, + { + "epoch": 0.14, + "learning_rate": 5.934224534738323e-06, + "loss": 0.8867, + "step": 672 + }, + { + "epoch": 0.14, + "learning_rate": 5.934019583199544e-06, + "loss": 0.9041, + "step": 673 + }, + { + "epoch": 0.14, + "learning_rate": 5.933814316400569e-06, + "loss": 0.8963, + "step": 674 + }, + { + "epoch": 0.14, + "learning_rate": 5.933608734363456e-06, + "loss": 1.1032, + "step": 675 + }, + { + "epoch": 0.14, + "learning_rate": 5.933402837110293e-06, + "loss": 0.8737, + "step": 676 + }, + { + "epoch": 0.14, + "learning_rate": 5.9331966246632054e-06, + "loss": 1.155, + "step": 677 + }, + { + "epoch": 0.14, + "learning_rate": 5.9329900970443495e-06, + "loss": 1.0075, + "step": 678 + }, + { + "epoch": 0.14, + "learning_rate": 5.9327832542759165e-06, + "loss": 1.0866, + "step": 679 + }, + { + "epoch": 0.14, + "learning_rate": 5.932576096380132e-06, + "loss": 0.7347, + "step": 680 + }, + { + "epoch": 0.14, + "learning_rate": 5.932368623379256e-06, + "loss": 1.1814, + "step": 681 + }, + { + "epoch": 0.14, + "learning_rate": 5.93216083529558e-06, + "loss": 0.887, + "step": 682 + }, + { + "epoch": 0.14, + "learning_rate": 5.931952732151432e-06, + "loss": 0.6585, + "step": 683 + }, + { + "epoch": 0.14, + "learning_rate": 5.931744313969172e-06, + "loss": 0.9091, + "step": 684 + }, + { + "epoch": 0.14, + "learning_rate": 5.9315355807711945e-06, + "loss": 0.8395, + "step": 685 + }, + { + "epoch": 0.14, + "learning_rate": 5.931326532579928e-06, + "loss": 1.2051, + "step": 686 + }, + { + "epoch": 0.14, + "learning_rate": 5.931117169417834e-06, + "loss": 1.2323, + "step": 687 + }, + { + "epoch": 0.14, + "learning_rate": 5.930907491307411e-06, + "loss": 1.1969, + "step": 688 + }, + { + "epoch": 0.14, + "learning_rate": 5.930697498271187e-06, + "loss": 1.0681, + "step": 689 + }, + { + "epoch": 0.14, + "learning_rate": 5.930487190331724e-06, + "loss": 1.2469, + "step": 690 + }, + { + "epoch": 0.14, + "learning_rate": 5.930276567511623e-06, + "loss": 0.8429, + "step": 691 + }, + { + "epoch": 0.14, + "learning_rate": 5.930065629833514e-06, + "loss": 1.0107, + "step": 692 + }, + { + "epoch": 0.14, + "learning_rate": 5.929854377320062e-06, + "loss": 0.6646, + "step": 693 + }, + { + "epoch": 0.14, + "learning_rate": 5.929642809993966e-06, + "loss": 1.2231, + "step": 694 + }, + { + "epoch": 0.14, + "learning_rate": 5.929430927877959e-06, + "loss": 0.9669, + "step": 695 + }, + { + "epoch": 0.14, + "learning_rate": 5.929218730994809e-06, + "loss": 0.9625, + "step": 696 + }, + { + "epoch": 0.14, + "learning_rate": 5.929006219367313e-06, + "loss": 1.0141, + "step": 697 + }, + { + "epoch": 0.15, + "learning_rate": 5.92879339301831e-06, + "loss": 0.9143, + "step": 698 + }, + { + "epoch": 0.15, + "learning_rate": 5.928580251970665e-06, + "loss": 1.2122, + "step": 699 + }, + { + "epoch": 0.15, + "learning_rate": 5.92836679624728e-06, + "loss": 0.9764, + "step": 700 + }, + { + "epoch": 0.15, + "learning_rate": 5.928153025871093e-06, + "loss": 0.9091, + "step": 701 + }, + { + "epoch": 0.15, + "learning_rate": 5.927938940865073e-06, + "loss": 1.0914, + "step": 702 + }, + { + "epoch": 0.15, + "learning_rate": 5.927724541252222e-06, + "loss": 0.9354, + "step": 703 + }, + { + "epoch": 0.15, + "learning_rate": 5.927509827055577e-06, + "loss": 1.0023, + "step": 704 + }, + { + "epoch": 0.15, + "learning_rate": 5.927294798298211e-06, + "loss": 1.1626, + "step": 705 + }, + { + "epoch": 0.15, + "learning_rate": 5.927079455003227e-06, + "loss": 1.1872, + "step": 706 + }, + { + "epoch": 0.15, + "learning_rate": 5.926863797193765e-06, + "loss": 1.106, + "step": 707 + }, + { + "epoch": 0.15, + "learning_rate": 5.926647824892996e-06, + "loss": 1.0565, + "step": 708 + }, + { + "epoch": 0.15, + "learning_rate": 5.926431538124129e-06, + "loss": 0.994, + "step": 709 + }, + { + "epoch": 0.15, + "learning_rate": 5.926214936910401e-06, + "loss": 1.2386, + "step": 710 + }, + { + "epoch": 0.15, + "learning_rate": 5.925998021275087e-06, + "loss": 0.9908, + "step": 711 + }, + { + "epoch": 0.15, + "learning_rate": 5.925780791241494e-06, + "loss": 1.1369, + "step": 712 + }, + { + "epoch": 0.15, + "learning_rate": 5.925563246832964e-06, + "loss": 0.8871, + "step": 713 + }, + { + "epoch": 0.15, + "learning_rate": 5.925345388072872e-06, + "loss": 0.9397, + "step": 714 + }, + { + "epoch": 0.15, + "learning_rate": 5.925127214984626e-06, + "loss": 1.0413, + "step": 715 + }, + { + "epoch": 0.15, + "learning_rate": 5.92490872759167e-06, + "loss": 1.0225, + "step": 716 + }, + { + "epoch": 0.15, + "learning_rate": 5.924689925917481e-06, + "loss": 0.8197, + "step": 717 + }, + { + "epoch": 0.15, + "learning_rate": 5.924470809985566e-06, + "loss": 0.9611, + "step": 718 + }, + { + "epoch": 0.15, + "learning_rate": 5.924251379819473e-06, + "loss": 0.912, + "step": 719 + }, + { + "epoch": 0.15, + "learning_rate": 5.924031635442776e-06, + "loss": 0.8961, + "step": 720 + }, + { + "epoch": 0.15, + "learning_rate": 5.92381157687909e-06, + "loss": 1.1176, + "step": 721 + }, + { + "epoch": 0.15, + "learning_rate": 5.9235912041520596e-06, + "loss": 1.0073, + "step": 722 + }, + { + "epoch": 0.15, + "learning_rate": 5.923370517285362e-06, + "loss": 1.1146, + "step": 723 + }, + { + "epoch": 0.15, + "learning_rate": 5.92314951630271e-06, + "loss": 0.9527, + "step": 724 + }, + { + "epoch": 0.15, + "learning_rate": 5.922928201227852e-06, + "loss": 0.9573, + "step": 725 + }, + { + "epoch": 0.15, + "learning_rate": 5.922706572084567e-06, + "loss": 0.8405, + "step": 726 + }, + { + "epoch": 0.15, + "learning_rate": 5.922484628896669e-06, + "loss": 1.1182, + "step": 727 + }, + { + "epoch": 0.15, + "learning_rate": 5.922262371688007e-06, + "loss": 1.1089, + "step": 728 + }, + { + "epoch": 0.15, + "learning_rate": 5.922039800482461e-06, + "loss": 0.8844, + "step": 729 + }, + { + "epoch": 0.15, + "learning_rate": 5.921816915303948e-06, + "loss": 0.937, + "step": 730 + }, + { + "epoch": 0.15, + "learning_rate": 5.921593716176414e-06, + "loss": 1.1841, + "step": 731 + }, + { + "epoch": 0.15, + "learning_rate": 5.921370203123845e-06, + "loss": 0.9058, + "step": 732 + }, + { + "epoch": 0.15, + "learning_rate": 5.921146376170257e-06, + "loss": 0.8316, + "step": 733 + }, + { + "epoch": 0.15, + "learning_rate": 5.920922235339697e-06, + "loss": 0.8517, + "step": 734 + }, + { + "epoch": 0.15, + "learning_rate": 5.920697780656253e-06, + "loss": 1.0168, + "step": 735 + }, + { + "epoch": 0.15, + "learning_rate": 5.920473012144041e-06, + "loss": 1.2233, + "step": 736 + }, + { + "epoch": 0.15, + "learning_rate": 5.920247929827212e-06, + "loss": 1.1049, + "step": 737 + }, + { + "epoch": 0.15, + "learning_rate": 5.920022533729951e-06, + "loss": 0.9317, + "step": 738 + }, + { + "epoch": 0.15, + "learning_rate": 5.919796823876477e-06, + "loss": 0.8863, + "step": 739 + }, + { + "epoch": 0.15, + "learning_rate": 5.919570800291042e-06, + "loss": 0.7835, + "step": 740 + }, + { + "epoch": 0.15, + "learning_rate": 5.919344462997934e-06, + "loss": 0.8907, + "step": 741 + }, + { + "epoch": 0.15, + "learning_rate": 5.919117812021472e-06, + "loss": 0.9031, + "step": 742 + }, + { + "epoch": 0.15, + "learning_rate": 5.918890847386009e-06, + "loss": 0.705, + "step": 743 + }, + { + "epoch": 0.15, + "learning_rate": 5.918663569115933e-06, + "loss": 0.9496, + "step": 744 + }, + { + "epoch": 0.15, + "learning_rate": 5.918435977235663e-06, + "loss": 0.908, + "step": 745 + }, + { + "epoch": 0.16, + "learning_rate": 5.918208071769656e-06, + "loss": 0.7387, + "step": 746 + }, + { + "epoch": 0.16, + "learning_rate": 5.917979852742402e-06, + "loss": 1.1984, + "step": 747 + }, + { + "epoch": 0.16, + "learning_rate": 5.91775132017842e-06, + "loss": 0.8959, + "step": 748 + }, + { + "epoch": 0.16, + "learning_rate": 5.917522474102266e-06, + "loss": 0.9789, + "step": 749 + }, + { + "epoch": 0.16, + "learning_rate": 5.917293314538531e-06, + "loss": 1.2681, + "step": 750 + }, + { + "epoch": 0.16, + "learning_rate": 5.9170638415118375e-06, + "loss": 0.9394, + "step": 751 + }, + { + "epoch": 0.16, + "learning_rate": 5.916834055046843e-06, + "loss": 0.9508, + "step": 752 + }, + { + "epoch": 0.16, + "learning_rate": 5.916603955168236e-06, + "loss": 1.0739, + "step": 753 + }, + { + "epoch": 0.16, + "learning_rate": 5.916373541900744e-06, + "loss": 0.9681, + "step": 754 + }, + { + "epoch": 0.16, + "learning_rate": 5.916142815269122e-06, + "loss": 0.8433, + "step": 755 + }, + { + "epoch": 0.16, + "learning_rate": 5.915911775298162e-06, + "loss": 0.9494, + "step": 756 + }, + { + "epoch": 0.16, + "learning_rate": 5.9156804220126906e-06, + "loss": 0.9702, + "step": 757 + }, + { + "epoch": 0.16, + "learning_rate": 5.9154487554375654e-06, + "loss": 1.0904, + "step": 758 + }, + { + "epoch": 0.16, + "learning_rate": 5.91521677559768e-06, + "loss": 1.1456, + "step": 759 + }, + { + "epoch": 0.16, + "learning_rate": 5.914984482517959e-06, + "loss": 0.9152, + "step": 760 + }, + { + "epoch": 0.16, + "learning_rate": 5.914751876223364e-06, + "loss": 0.915, + "step": 761 + }, + { + "epoch": 0.16, + "learning_rate": 5.9145189567388885e-06, + "loss": 1.0844, + "step": 762 + }, + { + "epoch": 0.16, + "learning_rate": 5.914285724089559e-06, + "loss": 0.9461, + "step": 763 + }, + { + "epoch": 0.16, + "learning_rate": 5.9140521783004365e-06, + "loss": 0.8494, + "step": 764 + }, + { + "epoch": 0.16, + "learning_rate": 5.913818319396615e-06, + "loss": 0.978, + "step": 765 + }, + { + "epoch": 0.16, + "learning_rate": 5.913584147403222e-06, + "loss": 1.0596, + "step": 766 + }, + { + "epoch": 0.16, + "learning_rate": 5.913349662345422e-06, + "loss": 0.9942, + "step": 767 + }, + { + "epoch": 0.16, + "learning_rate": 5.9131148642484076e-06, + "loss": 0.9661, + "step": 768 + }, + { + "epoch": 0.16, + "learning_rate": 5.912879753137409e-06, + "loss": 1.0946, + "step": 769 + }, + { + "epoch": 0.16, + "learning_rate": 5.91264432903769e-06, + "loss": 1.2015, + "step": 770 + }, + { + "epoch": 0.16, + "learning_rate": 5.912408591974544e-06, + "loss": 1.0376, + "step": 771 + }, + { + "epoch": 0.16, + "learning_rate": 5.9121725419733045e-06, + "loss": 0.8373, + "step": 772 + }, + { + "epoch": 0.16, + "learning_rate": 5.9119361790593306e-06, + "loss": 0.8354, + "step": 773 + }, + { + "epoch": 0.16, + "learning_rate": 5.911699503258024e-06, + "loss": 0.6637, + "step": 774 + }, + { + "epoch": 0.16, + "learning_rate": 5.911462514594813e-06, + "loss": 0.9489, + "step": 775 + }, + { + "epoch": 0.16, + "learning_rate": 5.911225213095163e-06, + "loss": 0.8387, + "step": 776 + }, + { + "epoch": 0.16, + "learning_rate": 5.910987598784571e-06, + "loss": 1.0826, + "step": 777 + }, + { + "epoch": 0.16, + "learning_rate": 5.910749671688569e-06, + "loss": 0.9352, + "step": 778 + }, + { + "epoch": 0.16, + "learning_rate": 5.910511431832723e-06, + "loss": 1.0935, + "step": 779 + }, + { + "epoch": 0.16, + "learning_rate": 5.910272879242631e-06, + "loss": 0.9465, + "step": 780 + }, + { + "epoch": 0.16, + "learning_rate": 5.9100340139439265e-06, + "loss": 0.9386, + "step": 781 + }, + { + "epoch": 0.16, + "learning_rate": 5.909794835962274e-06, + "loss": 1.0466, + "step": 782 + }, + { + "epoch": 0.16, + "learning_rate": 5.909555345323375e-06, + "loss": 1.0514, + "step": 783 + }, + { + "epoch": 0.16, + "learning_rate": 5.909315542052961e-06, + "loss": 0.9349, + "step": 784 + }, + { + "epoch": 0.16, + "learning_rate": 5.9090754261768e-06, + "loss": 0.8714, + "step": 785 + }, + { + "epoch": 0.16, + "learning_rate": 5.908834997720692e-06, + "loss": 0.8605, + "step": 786 + }, + { + "epoch": 0.16, + "learning_rate": 5.90859425671047e-06, + "loss": 0.9651, + "step": 787 + }, + { + "epoch": 0.16, + "learning_rate": 5.908353203172004e-06, + "loss": 1.0957, + "step": 788 + }, + { + "epoch": 0.16, + "learning_rate": 5.9081118371311926e-06, + "loss": 0.954, + "step": 789 + }, + { + "epoch": 0.16, + "learning_rate": 5.907870158613974e-06, + "loss": 0.8664, + "step": 790 + }, + { + "epoch": 0.16, + "learning_rate": 5.907628167646313e-06, + "loss": 0.9462, + "step": 791 + }, + { + "epoch": 0.16, + "learning_rate": 5.907385864254212e-06, + "loss": 1.0669, + "step": 792 + }, + { + "epoch": 0.16, + "learning_rate": 5.907143248463708e-06, + "loss": 1.05, + "step": 793 + }, + { + "epoch": 0.17, + "learning_rate": 5.90690032030087e-06, + "loss": 0.9395, + "step": 794 + }, + { + "epoch": 0.17, + "learning_rate": 5.906657079791799e-06, + "loss": 1.0652, + "step": 795 + }, + { + "epoch": 0.17, + "learning_rate": 5.9064135269626335e-06, + "loss": 0.9278, + "step": 796 + }, + { + "epoch": 0.17, + "learning_rate": 5.906169661839541e-06, + "loss": 0.9545, + "step": 797 + }, + { + "epoch": 0.17, + "learning_rate": 5.905925484448727e-06, + "loss": 1.0064, + "step": 798 + }, + { + "epoch": 0.17, + "learning_rate": 5.905680994816425e-06, + "loss": 0.9739, + "step": 799 + }, + { + "epoch": 0.17, + "learning_rate": 5.9054361929689085e-06, + "loss": 0.7654, + "step": 800 + }, + { + "epoch": 0.17, + "learning_rate": 5.905191078932481e-06, + "loss": 0.9001, + "step": 801 + }, + { + "epoch": 0.17, + "learning_rate": 5.904945652733478e-06, + "loss": 0.8613, + "step": 802 + }, + { + "epoch": 0.17, + "learning_rate": 5.904699914398272e-06, + "loss": 1.0806, + "step": 803 + }, + { + "epoch": 0.17, + "learning_rate": 5.904453863953269e-06, + "loss": 1.0699, + "step": 804 + }, + { + "epoch": 0.17, + "learning_rate": 5.904207501424905e-06, + "loss": 0.9156, + "step": 805 + }, + { + "epoch": 0.17, + "learning_rate": 5.903960826839652e-06, + "loss": 0.8991, + "step": 806 + }, + { + "epoch": 0.17, + "learning_rate": 5.903713840224014e-06, + "loss": 1.1375, + "step": 807 + }, + { + "epoch": 0.17, + "learning_rate": 5.903466541604533e-06, + "loss": 0.7076, + "step": 808 + }, + { + "epoch": 0.17, + "learning_rate": 5.903218931007779e-06, + "loss": 0.796, + "step": 809 + }, + { + "epoch": 0.17, + "learning_rate": 5.902971008460357e-06, + "loss": 0.9846, + "step": 810 + }, + { + "epoch": 0.17, + "learning_rate": 5.902722773988908e-06, + "loss": 1.2552, + "step": 811 + }, + { + "epoch": 0.17, + "learning_rate": 5.902474227620104e-06, + "loss": 1.0545, + "step": 812 + }, + { + "epoch": 0.17, + "learning_rate": 5.9022253693806515e-06, + "loss": 0.7086, + "step": 813 + }, + { + "epoch": 0.17, + "learning_rate": 5.90197619929729e-06, + "loss": 1.1663, + "step": 814 + }, + { + "epoch": 0.17, + "learning_rate": 5.9017267173967934e-06, + "loss": 0.8925, + "step": 815 + }, + { + "epoch": 0.17, + "learning_rate": 5.901476923705968e-06, + "loss": 0.9389, + "step": 816 + }, + { + "epoch": 0.17, + "learning_rate": 5.9012268182516535e-06, + "loss": 1.0029, + "step": 817 + }, + { + "epoch": 0.17, + "learning_rate": 5.900976401060724e-06, + "loss": 0.6915, + "step": 818 + }, + { + "epoch": 0.17, + "learning_rate": 5.9007256721600884e-06, + "loss": 0.9245, + "step": 819 + }, + { + "epoch": 0.17, + "learning_rate": 5.900474631576686e-06, + "loss": 1.0054, + "step": 820 + }, + { + "epoch": 0.17, + "learning_rate": 5.900223279337491e-06, + "loss": 1.1444, + "step": 821 + }, + { + "epoch": 0.17, + "learning_rate": 5.899971615469512e-06, + "loss": 1.1102, + "step": 822 + }, + { + "epoch": 0.17, + "learning_rate": 5.8997196399997905e-06, + "loss": 0.825, + "step": 823 + }, + { + "epoch": 0.17, + "learning_rate": 5.899467352955399e-06, + "loss": 1.071, + "step": 824 + }, + { + "epoch": 0.17, + "learning_rate": 5.899214754363449e-06, + "loss": 0.8469, + "step": 825 + }, + { + "epoch": 0.17, + "learning_rate": 5.89896184425108e-06, + "loss": 1.0754, + "step": 826 + }, + { + "epoch": 0.17, + "learning_rate": 5.898708622645467e-06, + "loss": 0.9485, + "step": 827 + }, + { + "epoch": 0.17, + "learning_rate": 5.8984550895738195e-06, + "loss": 1.1359, + "step": 828 + }, + { + "epoch": 0.17, + "learning_rate": 5.89820124506338e-06, + "loss": 1.0604, + "step": 829 + }, + { + "epoch": 0.17, + "learning_rate": 5.897947089141424e-06, + "loss": 0.9478, + "step": 830 + }, + { + "epoch": 0.17, + "learning_rate": 5.897692621835258e-06, + "loss": 0.9235, + "step": 831 + }, + { + "epoch": 0.17, + "learning_rate": 5.897437843172229e-06, + "loss": 1.0296, + "step": 832 + }, + { + "epoch": 0.17, + "learning_rate": 5.897182753179709e-06, + "loss": 1.2143, + "step": 833 + }, + { + "epoch": 0.17, + "learning_rate": 5.896927351885109e-06, + "loss": 0.839, + "step": 834 + }, + { + "epoch": 0.17, + "learning_rate": 5.896671639315872e-06, + "loss": 1.2925, + "step": 835 + }, + { + "epoch": 0.17, + "learning_rate": 5.8964156154994746e-06, + "loss": 0.971, + "step": 836 + }, + { + "epoch": 0.17, + "learning_rate": 5.896159280463425e-06, + "loss": 0.9567, + "step": 837 + }, + { + "epoch": 0.17, + "learning_rate": 5.895902634235268e-06, + "loss": 1.0743, + "step": 838 + }, + { + "epoch": 0.17, + "learning_rate": 5.89564567684258e-06, + "loss": 1.1548, + "step": 839 + }, + { + "epoch": 0.17, + "learning_rate": 5.895388408312969e-06, + "loss": 0.8426, + "step": 840 + }, + { + "epoch": 0.17, + "learning_rate": 5.895130828674082e-06, + "loss": 1.0337, + "step": 841 + }, + { + "epoch": 0.18, + "learning_rate": 5.8948729379535936e-06, + "loss": 0.8381, + "step": 842 + }, + { + "epoch": 0.18, + "learning_rate": 5.894614736179214e-06, + "loss": 0.9898, + "step": 843 + }, + { + "epoch": 0.18, + "learning_rate": 5.894356223378688e-06, + "loss": 0.9441, + "step": 844 + }, + { + "epoch": 0.18, + "learning_rate": 5.894097399579792e-06, + "loss": 1.2999, + "step": 845 + }, + { + "epoch": 0.18, + "learning_rate": 5.893838264810338e-06, + "loss": 0.9017, + "step": 846 + }, + { + "epoch": 0.18, + "learning_rate": 5.893578819098167e-06, + "loss": 1.0466, + "step": 847 + }, + { + "epoch": 0.18, + "learning_rate": 5.893319062471159e-06, + "loss": 1.0647, + "step": 848 + }, + { + "epoch": 0.18, + "learning_rate": 5.893058994957226e-06, + "loss": 0.846, + "step": 849 + }, + { + "epoch": 0.18, + "learning_rate": 5.8927986165843085e-06, + "loss": 0.9282, + "step": 850 + }, + { + "epoch": 0.18, + "learning_rate": 5.892537927380386e-06, + "loss": 0.9183, + "step": 851 + }, + { + "epoch": 0.18, + "learning_rate": 5.892276927373471e-06, + "loss": 1.1419, + "step": 852 + }, + { + "epoch": 0.18, + "learning_rate": 5.892015616591606e-06, + "loss": 0.8593, + "step": 853 + }, + { + "epoch": 0.18, + "learning_rate": 5.891753995062869e-06, + "loss": 0.8307, + "step": 854 + }, + { + "epoch": 0.18, + "learning_rate": 5.891492062815372e-06, + "loss": 0.9788, + "step": 855 + }, + { + "epoch": 0.18, + "learning_rate": 5.891229819877258e-06, + "loss": 1.0037, + "step": 856 + }, + { + "epoch": 0.18, + "learning_rate": 5.890967266276708e-06, + "loss": 1.0128, + "step": 857 + }, + { + "epoch": 0.18, + "learning_rate": 5.8907044020419305e-06, + "loss": 0.8747, + "step": 858 + }, + { + "epoch": 0.18, + "learning_rate": 5.890441227201172e-06, + "loss": 1.1252, + "step": 859 + }, + { + "epoch": 0.18, + "learning_rate": 5.890177741782708e-06, + "loss": 0.7478, + "step": 860 + }, + { + "epoch": 0.18, + "learning_rate": 5.8899139458148535e-06, + "loss": 0.9022, + "step": 861 + }, + { + "epoch": 0.18, + "learning_rate": 5.889649839325952e-06, + "loss": 1.1451, + "step": 862 + }, + { + "epoch": 0.18, + "learning_rate": 5.8893854223443805e-06, + "loss": 1.0125, + "step": 863 + }, + { + "epoch": 0.18, + "learning_rate": 5.889120694898552e-06, + "loss": 0.9068, + "step": 864 + }, + { + "epoch": 0.18, + "learning_rate": 5.888855657016912e-06, + "loss": 0.8344, + "step": 865 + }, + { + "epoch": 0.18, + "learning_rate": 5.888590308727936e-06, + "loss": 0.8988, + "step": 866 + }, + { + "epoch": 0.18, + "learning_rate": 5.888324650060138e-06, + "loss": 1.1404, + "step": 867 + }, + { + "epoch": 0.18, + "learning_rate": 5.888058681042063e-06, + "loss": 0.7589, + "step": 868 + }, + { + "epoch": 0.18, + "learning_rate": 5.887792401702288e-06, + "loss": 1.0497, + "step": 869 + }, + { + "epoch": 0.18, + "learning_rate": 5.887525812069426e-06, + "loss": 0.9908, + "step": 870 + }, + { + "epoch": 0.18, + "learning_rate": 5.887258912172122e-06, + "loss": 1.0755, + "step": 871 + }, + { + "epoch": 0.18, + "learning_rate": 5.886991702039053e-06, + "loss": 0.9894, + "step": 872 + }, + { + "epoch": 0.18, + "learning_rate": 5.886724181698933e-06, + "loss": 0.9333, + "step": 873 + }, + { + "epoch": 0.18, + "learning_rate": 5.886456351180504e-06, + "loss": 0.7531, + "step": 874 + }, + { + "epoch": 0.18, + "learning_rate": 5.886188210512546e-06, + "loss": 0.8196, + "step": 875 + }, + { + "epoch": 0.18, + "learning_rate": 5.885919759723872e-06, + "loss": 0.9511, + "step": 876 + }, + { + "epoch": 0.18, + "learning_rate": 5.885650998843325e-06, + "loss": 0.8569, + "step": 877 + }, + { + "epoch": 0.18, + "learning_rate": 5.885381927899784e-06, + "loss": 1.0847, + "step": 878 + }, + { + "epoch": 0.18, + "learning_rate": 5.885112546922162e-06, + "loss": 0.9305, + "step": 879 + }, + { + "epoch": 0.18, + "learning_rate": 5.884842855939402e-06, + "loss": 1.1224, + "step": 880 + }, + { + "epoch": 0.18, + "learning_rate": 5.8845728549804825e-06, + "loss": 0.9977, + "step": 881 + }, + { + "epoch": 0.18, + "learning_rate": 5.884302544074416e-06, + "loss": 1.1268, + "step": 882 + }, + { + "epoch": 0.18, + "learning_rate": 5.884031923250247e-06, + "loss": 1.1065, + "step": 883 + }, + { + "epoch": 0.18, + "learning_rate": 5.883760992537053e-06, + "loss": 0.9434, + "step": 884 + }, + { + "epoch": 0.18, + "learning_rate": 5.8834897519639475e-06, + "loss": 0.8939, + "step": 885 + }, + { + "epoch": 0.18, + "learning_rate": 5.883218201560072e-06, + "loss": 0.8315, + "step": 886 + }, + { + "epoch": 0.18, + "learning_rate": 5.882946341354607e-06, + "loss": 0.9025, + "step": 887 + }, + { + "epoch": 0.18, + "learning_rate": 5.8826741713767645e-06, + "loss": 1.0897, + "step": 888 + }, + { + "epoch": 0.18, + "learning_rate": 5.882401691655787e-06, + "loss": 1.0116, + "step": 889 + }, + { + "epoch": 0.19, + "learning_rate": 5.8821289022209545e-06, + "loss": 0.8393, + "step": 890 + }, + { + "epoch": 0.19, + "learning_rate": 5.8818558031015764e-06, + "loss": 0.9722, + "step": 891 + }, + { + "epoch": 0.19, + "learning_rate": 5.881582394326998e-06, + "loss": 1.017, + "step": 892 + }, + { + "epoch": 0.19, + "learning_rate": 5.881308675926597e-06, + "loss": 1.1734, + "step": 893 + }, + { + "epoch": 0.19, + "learning_rate": 5.881034647929784e-06, + "loss": 0.8119, + "step": 894 + }, + { + "epoch": 0.19, + "learning_rate": 5.880760310366003e-06, + "loss": 0.8195, + "step": 895 + }, + { + "epoch": 0.19, + "learning_rate": 5.880485663264733e-06, + "loss": 1.1075, + "step": 896 + }, + { + "epoch": 0.19, + "learning_rate": 5.880210706655485e-06, + "loss": 1.0282, + "step": 897 + }, + { + "epoch": 0.19, + "learning_rate": 5.879935440567801e-06, + "loss": 1.0808, + "step": 898 + }, + { + "epoch": 0.19, + "learning_rate": 5.879659865031259e-06, + "loss": 0.7812, + "step": 899 + }, + { + "epoch": 0.19, + "learning_rate": 5.879383980075471e-06, + "loss": 1.2825, + "step": 900 + }, + { + "epoch": 0.19, + "learning_rate": 5.87910778573008e-06, + "loss": 0.9601, + "step": 901 + }, + { + "epoch": 0.19, + "learning_rate": 5.8788312820247614e-06, + "loss": 0.7489, + "step": 902 + }, + { + "epoch": 0.19, + "learning_rate": 5.878554468989228e-06, + "loss": 1.0556, + "step": 903 + }, + { + "epoch": 0.19, + "learning_rate": 5.878277346653222e-06, + "loss": 0.8035, + "step": 904 + }, + { + "epoch": 0.19, + "learning_rate": 5.877999915046522e-06, + "loss": 0.9809, + "step": 905 + }, + { + "epoch": 0.19, + "learning_rate": 5.877722174198934e-06, + "loss": 0.8646, + "step": 906 + }, + { + "epoch": 0.19, + "learning_rate": 5.877444124140305e-06, + "loss": 0.8263, + "step": 907 + }, + { + "epoch": 0.19, + "learning_rate": 5.8771657649005105e-06, + "loss": 1.0161, + "step": 908 + }, + { + "epoch": 0.19, + "learning_rate": 5.87688709650946e-06, + "loss": 1.115, + "step": 909 + }, + { + "epoch": 0.19, + "learning_rate": 5.876608118997095e-06, + "loss": 0.937, + "step": 910 + }, + { + "epoch": 0.19, + "learning_rate": 5.876328832393394e-06, + "loss": 0.9749, + "step": 911 + }, + { + "epoch": 0.19, + "learning_rate": 5.876049236728364e-06, + "loss": 0.8744, + "step": 912 + }, + { + "epoch": 0.19, + "learning_rate": 5.87576933203205e-06, + "loss": 0.9913, + "step": 913 + }, + { + "epoch": 0.19, + "learning_rate": 5.875489118334526e-06, + "loss": 1.0588, + "step": 914 + }, + { + "epoch": 0.19, + "learning_rate": 5.875208595665902e-06, + "loss": 1.0019, + "step": 915 + }, + { + "epoch": 0.19, + "learning_rate": 5.874927764056319e-06, + "loss": 0.9127, + "step": 916 + }, + { + "epoch": 0.19, + "learning_rate": 5.874646623535952e-06, + "loss": 1.0317, + "step": 917 + }, + { + "epoch": 0.19, + "learning_rate": 5.874365174135011e-06, + "loss": 1.0287, + "step": 918 + }, + { + "epoch": 0.19, + "learning_rate": 5.874083415883738e-06, + "loss": 0.903, + "step": 919 + }, + { + "epoch": 0.19, + "learning_rate": 5.873801348812407e-06, + "loss": 0.8883, + "step": 920 + }, + { + "epoch": 0.19, + "learning_rate": 5.873518972951326e-06, + "loss": 0.9026, + "step": 921 + }, + { + "epoch": 0.19, + "learning_rate": 5.873236288330836e-06, + "loss": 0.9646, + "step": 922 + }, + { + "epoch": 0.19, + "learning_rate": 5.872953294981312e-06, + "loss": 0.9352, + "step": 923 + }, + { + "epoch": 0.19, + "learning_rate": 5.872669992933161e-06, + "loss": 1.099, + "step": 924 + }, + { + "epoch": 0.19, + "learning_rate": 5.872386382216825e-06, + "loss": 0.9414, + "step": 925 + }, + { + "epoch": 0.19, + "learning_rate": 5.872102462862777e-06, + "loss": 0.9201, + "step": 926 + }, + { + "epoch": 0.19, + "learning_rate": 5.871818234901524e-06, + "loss": 0.9896, + "step": 927 + }, + { + "epoch": 0.19, + "learning_rate": 5.871533698363606e-06, + "loss": 0.9712, + "step": 928 + }, + { + "epoch": 0.19, + "learning_rate": 5.871248853279599e-06, + "loss": 0.9275, + "step": 929 + }, + { + "epoch": 0.19, + "learning_rate": 5.8709636996801055e-06, + "loss": 1.1458, + "step": 930 + }, + { + "epoch": 0.19, + "learning_rate": 5.870678237595768e-06, + "loss": 0.9368, + "step": 931 + }, + { + "epoch": 0.19, + "learning_rate": 5.87039246705726e-06, + "loss": 0.9559, + "step": 932 + }, + { + "epoch": 0.19, + "learning_rate": 5.870106388095285e-06, + "loss": 1.1755, + "step": 933 + }, + { + "epoch": 0.19, + "learning_rate": 5.869820000740584e-06, + "loss": 1.0806, + "step": 934 + }, + { + "epoch": 0.19, + "learning_rate": 5.869533305023928e-06, + "loss": 0.9337, + "step": 935 + }, + { + "epoch": 0.19, + "learning_rate": 5.869246300976124e-06, + "loss": 1.0708, + "step": 936 + }, + { + "epoch": 0.19, + "learning_rate": 5.868958988628008e-06, + "loss": 1.1363, + "step": 937 + }, + { + "epoch": 0.2, + "learning_rate": 5.868671368010455e-06, + "loss": 1.0252, + "step": 938 + }, + { + "epoch": 0.2, + "learning_rate": 5.868383439154368e-06, + "loss": 0.828, + "step": 939 + }, + { + "epoch": 0.2, + "learning_rate": 5.868095202090685e-06, + "loss": 0.9052, + "step": 940 + }, + { + "epoch": 0.2, + "learning_rate": 5.8678066568503766e-06, + "loss": 1.1019, + "step": 941 + }, + { + "epoch": 0.2, + "learning_rate": 5.867517803464449e-06, + "loss": 1.1023, + "step": 942 + }, + { + "epoch": 0.2, + "learning_rate": 5.8672286419639366e-06, + "loss": 0.8174, + "step": 943 + }, + { + "epoch": 0.2, + "learning_rate": 5.866939172379911e-06, + "loss": 1.0403, + "step": 944 + }, + { + "epoch": 0.2, + "learning_rate": 5.866649394743477e-06, + "loss": 1.0235, + "step": 945 + }, + { + "epoch": 0.2, + "learning_rate": 5.8663593090857705e-06, + "loss": 0.9484, + "step": 946 + }, + { + "epoch": 0.2, + "learning_rate": 5.866068915437959e-06, + "loss": 1.1613, + "step": 947 + }, + { + "epoch": 0.2, + "learning_rate": 5.865778213831249e-06, + "loss": 0.9908, + "step": 948 + }, + { + "epoch": 0.2, + "learning_rate": 5.865487204296873e-06, + "loss": 0.7886, + "step": 949 + }, + { + "epoch": 0.2, + "learning_rate": 5.865195886866101e-06, + "loss": 1.1963, + "step": 950 + }, + { + "epoch": 0.2, + "learning_rate": 5.8649042615702375e-06, + "loss": 1.0187, + "step": 951 + }, + { + "epoch": 0.2, + "learning_rate": 5.864612328440614e-06, + "loss": 1.1205, + "step": 952 + }, + { + "epoch": 0.2, + "learning_rate": 5.8643200875086005e-06, + "loss": 0.848, + "step": 953 + }, + { + "epoch": 0.2, + "learning_rate": 5.864027538805598e-06, + "loss": 1.2144, + "step": 954 + }, + { + "epoch": 0.2, + "learning_rate": 5.863734682363041e-06, + "loss": 0.8916, + "step": 955 + }, + { + "epoch": 0.2, + "learning_rate": 5.8634415182123966e-06, + "loss": 0.8845, + "step": 956 + }, + { + "epoch": 0.2, + "learning_rate": 5.863148046385165e-06, + "loss": 1.1424, + "step": 957 + }, + { + "epoch": 0.2, + "learning_rate": 5.862854266912881e-06, + "loss": 0.9103, + "step": 958 + }, + { + "epoch": 0.2, + "learning_rate": 5.86256017982711e-06, + "loss": 0.9764, + "step": 959 + }, + { + "epoch": 0.2, + "learning_rate": 5.862265785159451e-06, + "loss": 1.11, + "step": 960 + }, + { + "epoch": 0.2, + "learning_rate": 5.861971082941539e-06, + "loss": 0.9905, + "step": 961 + }, + { + "epoch": 0.2, + "learning_rate": 5.861676073205037e-06, + "loss": 1.325, + "step": 962 + }, + { + "epoch": 0.2, + "learning_rate": 5.861380755981647e-06, + "loss": 0.9543, + "step": 963 + }, + { + "epoch": 0.2, + "learning_rate": 5.861085131303097e-06, + "loss": 1.1177, + "step": 964 + }, + { + "epoch": 0.2, + "learning_rate": 5.860789199201155e-06, + "loss": 0.9323, + "step": 965 + }, + { + "epoch": 0.2, + "learning_rate": 5.860492959707618e-06, + "loss": 1.1, + "step": 966 + }, + { + "epoch": 0.2, + "learning_rate": 5.860196412854315e-06, + "loss": 0.9953, + "step": 967 + }, + { + "epoch": 0.2, + "learning_rate": 5.859899558673113e-06, + "loss": 1.0804, + "step": 968 + }, + { + "epoch": 0.2, + "learning_rate": 5.859602397195907e-06, + "loss": 1.0198, + "step": 969 + }, + { + "epoch": 0.2, + "learning_rate": 5.859304928454627e-06, + "loss": 0.9311, + "step": 970 + }, + { + "epoch": 0.2, + "learning_rate": 5.859007152481237e-06, + "loss": 1.0267, + "step": 971 + }, + { + "epoch": 0.2, + "learning_rate": 5.858709069307732e-06, + "loss": 0.9001, + "step": 972 + }, + { + "epoch": 0.2, + "learning_rate": 5.858410678966142e-06, + "loss": 0.8055, + "step": 973 + }, + { + "epoch": 0.2, + "learning_rate": 5.858111981488529e-06, + "loss": 0.7812, + "step": 974 + }, + { + "epoch": 0.2, + "learning_rate": 5.8578129769069865e-06, + "loss": 1.0499, + "step": 975 + }, + { + "epoch": 0.2, + "learning_rate": 5.857513665253644e-06, + "loss": 1.0347, + "step": 976 + }, + { + "epoch": 0.2, + "learning_rate": 5.857214046560663e-06, + "loss": 1.1178, + "step": 977 + }, + { + "epoch": 0.2, + "learning_rate": 5.856914120860235e-06, + "loss": 1.0785, + "step": 978 + }, + { + "epoch": 0.2, + "learning_rate": 5.856613888184589e-06, + "loss": 1.0662, + "step": 979 + }, + { + "epoch": 0.2, + "learning_rate": 5.856313348565986e-06, + "loss": 0.7728, + "step": 980 + }, + { + "epoch": 0.2, + "learning_rate": 5.856012502036715e-06, + "loss": 1.1729, + "step": 981 + }, + { + "epoch": 0.2, + "learning_rate": 5.855711348629106e-06, + "loss": 0.9995, + "step": 982 + }, + { + "epoch": 0.2, + "learning_rate": 5.855409888375516e-06, + "loss": 1.0877, + "step": 983 + }, + { + "epoch": 0.2, + "learning_rate": 5.855108121308338e-06, + "loss": 0.7771, + "step": 984 + }, + { + "epoch": 0.2, + "learning_rate": 5.8548060474599955e-06, + "loss": 1.0469, + "step": 985 + }, + { + "epoch": 0.21, + "learning_rate": 5.854503666862947e-06, + "loss": 0.9618, + "step": 986 + }, + { + "epoch": 0.21, + "learning_rate": 5.854200979549683e-06, + "loss": 1.0187, + "step": 987 + }, + { + "epoch": 0.21, + "learning_rate": 5.853897985552728e-06, + "loss": 0.8451, + "step": 988 + }, + { + "epoch": 0.21, + "learning_rate": 5.853594684904637e-06, + "loss": 1.1464, + "step": 989 + }, + { + "epoch": 0.21, + "learning_rate": 5.853291077638001e-06, + "loss": 1.0543, + "step": 990 + }, + { + "epoch": 0.21, + "learning_rate": 5.852987163785443e-06, + "loss": 0.9622, + "step": 991 + }, + { + "epoch": 0.21, + "learning_rate": 5.852682943379618e-06, + "loss": 0.9454, + "step": 992 + }, + { + "epoch": 0.21, + "learning_rate": 5.852378416453213e-06, + "loss": 1.0055, + "step": 993 + }, + { + "epoch": 0.21, + "learning_rate": 5.8520735830389515e-06, + "loss": 0.9348, + "step": 994 + }, + { + "epoch": 0.21, + "learning_rate": 5.8517684431695874e-06, + "loss": 1.1165, + "step": 995 + }, + { + "epoch": 0.21, + "learning_rate": 5.851462996877907e-06, + "loss": 0.9761, + "step": 996 + }, + { + "epoch": 0.21, + "learning_rate": 5.851157244196732e-06, + "loss": 1.0265, + "step": 997 + }, + { + "epoch": 0.21, + "learning_rate": 5.850851185158913e-06, + "loss": 0.8383, + "step": 998 + }, + { + "epoch": 0.21, + "learning_rate": 5.8505448197973386e-06, + "loss": 0.7758, + "step": 999 + }, + { + "epoch": 0.21, + "learning_rate": 5.850238148144927e-06, + "loss": 1.0241, + "step": 1000 + }, + { + "epoch": 0.21, + "eval_loss": NaN, + "eval_runtime": 15.0405, + "eval_samples_per_second": 352.05, + "eval_steps_per_second": 44.015, + "step": 1000 + }, + { + "epoch": 0.21, + "learning_rate": 5.84993117023463e-06, + "loss": 1.1191, + "step": 1001 + }, + { + "epoch": 0.21, + "learning_rate": 5.84962388609943e-06, + "loss": 0.9286, + "step": 1002 + }, + { + "epoch": 0.21, + "learning_rate": 5.849316295772349e-06, + "loss": 0.8805, + "step": 1003 + }, + { + "epoch": 0.21, + "learning_rate": 5.849008399286434e-06, + "loss": 0.8548, + "step": 1004 + }, + { + "epoch": 0.21, + "learning_rate": 5.848700196674771e-06, + "loss": 1.1708, + "step": 1005 + }, + { + "epoch": 0.21, + "learning_rate": 5.848391687970474e-06, + "loss": 0.9909, + "step": 1006 + }, + { + "epoch": 0.21, + "learning_rate": 5.848082873206694e-06, + "loss": 0.9793, + "step": 1007 + }, + { + "epoch": 0.21, + "learning_rate": 5.847773752416613e-06, + "loss": 0.9137, + "step": 1008 + }, + { + "epoch": 0.21, + "learning_rate": 5.847464325633445e-06, + "loss": 0.9185, + "step": 1009 + }, + { + "epoch": 0.21, + "learning_rate": 5.847154592890438e-06, + "loss": 1.0124, + "step": 1010 + }, + { + "epoch": 0.21, + "learning_rate": 5.846844554220874e-06, + "loss": 1.1031, + "step": 1011 + }, + { + "epoch": 0.21, + "learning_rate": 5.846534209658064e-06, + "loss": 0.9711, + "step": 1012 + }, + { + "epoch": 0.21, + "learning_rate": 5.846223559235359e-06, + "loss": 1.092, + "step": 1013 + }, + { + "epoch": 0.21, + "learning_rate": 5.8459126029861335e-06, + "loss": 1.0362, + "step": 1014 + }, + { + "epoch": 0.21, + "learning_rate": 5.845601340943803e-06, + "loss": 0.9165, + "step": 1015 + }, + { + "epoch": 0.21, + "learning_rate": 5.845289773141811e-06, + "loss": 0.7596, + "step": 1016 + }, + { + "epoch": 0.21, + "learning_rate": 5.844977899613636e-06, + "loss": 1.0927, + "step": 1017 + }, + { + "epoch": 0.21, + "learning_rate": 5.844665720392788e-06, + "loss": 0.8265, + "step": 1018 + }, + { + "epoch": 0.21, + "learning_rate": 5.844353235512812e-06, + "loss": 0.877, + "step": 1019 + }, + { + "epoch": 0.21, + "learning_rate": 5.844040445007284e-06, + "loss": 0.7953, + "step": 1020 + }, + { + "epoch": 0.21, + "learning_rate": 5.8437273489098135e-06, + "loss": 1.0665, + "step": 1021 + }, + { + "epoch": 0.21, + "learning_rate": 5.843413947254041e-06, + "loss": 0.9571, + "step": 1022 + }, + { + "epoch": 0.21, + "learning_rate": 5.843100240073644e-06, + "loss": 0.9639, + "step": 1023 + }, + { + "epoch": 0.21, + "learning_rate": 5.842786227402328e-06, + "loss": 0.8448, + "step": 1024 + }, + { + "epoch": 0.21, + "learning_rate": 5.842471909273836e-06, + "loss": 1.0678, + "step": 1025 + }, + { + "epoch": 0.21, + "learning_rate": 5.8421572857219386e-06, + "loss": 0.811, + "step": 1026 + }, + { + "epoch": 0.21, + "learning_rate": 5.841842356780446e-06, + "loss": 0.8327, + "step": 1027 + }, + { + "epoch": 0.21, + "learning_rate": 5.841527122483193e-06, + "loss": 0.8476, + "step": 1028 + }, + { + "epoch": 0.21, + "learning_rate": 5.841211582864053e-06, + "loss": 1.0481, + "step": 1029 + }, + { + "epoch": 0.21, + "learning_rate": 5.840895737956933e-06, + "loss": 1.0231, + "step": 1030 + }, + { + "epoch": 0.21, + "learning_rate": 5.840579587795768e-06, + "loss": 1.0444, + "step": 1031 + }, + { + "epoch": 0.21, + "learning_rate": 5.840263132414529e-06, + "loss": 1.0048, + "step": 1032 + }, + { + "epoch": 0.21, + "learning_rate": 5.839946371847219e-06, + "loss": 1.1287, + "step": 1033 + }, + { + "epoch": 0.22, + "learning_rate": 5.839629306127873e-06, + "loss": 1.0028, + "step": 1034 + }, + { + "epoch": 0.22, + "learning_rate": 5.839311935290563e-06, + "loss": 0.7991, + "step": 1035 + }, + { + "epoch": 0.22, + "learning_rate": 5.838994259369386e-06, + "loss": 0.9016, + "step": 1036 + }, + { + "epoch": 0.22, + "learning_rate": 5.838676278398479e-06, + "loss": 0.9892, + "step": 1037 + }, + { + "epoch": 0.22, + "learning_rate": 5.838357992412009e-06, + "loss": 0.8376, + "step": 1038 + }, + { + "epoch": 0.22, + "learning_rate": 5.838039401444175e-06, + "loss": 1.1115, + "step": 1039 + }, + { + "epoch": 0.22, + "learning_rate": 5.8377205055292094e-06, + "loss": 0.8653, + "step": 1040 + }, + { + "epoch": 0.22, + "learning_rate": 5.837401304701379e-06, + "loss": 1.0482, + "step": 1041 + }, + { + "epoch": 0.22, + "learning_rate": 5.83708179899498e-06, + "loss": 1.1358, + "step": 1042 + }, + { + "epoch": 0.22, + "learning_rate": 5.836761988444344e-06, + "loss": 0.8272, + "step": 1043 + }, + { + "epoch": 0.22, + "learning_rate": 5.8364418730838346e-06, + "loss": 0.8071, + "step": 1044 + }, + { + "epoch": 0.22, + "learning_rate": 5.836121452947849e-06, + "loss": 0.9094, + "step": 1045 + }, + { + "epoch": 0.22, + "learning_rate": 5.8358007280708155e-06, + "loss": 1.0294, + "step": 1046 + }, + { + "epoch": 0.22, + "learning_rate": 5.835479698487196e-06, + "loss": 0.9591, + "step": 1047 + }, + { + "epoch": 0.22, + "learning_rate": 5.8351583642314854e-06, + "loss": 0.9853, + "step": 1048 + }, + { + "epoch": 0.22, + "learning_rate": 5.8348367253382115e-06, + "loss": 1.1801, + "step": 1049 + }, + { + "epoch": 0.22, + "learning_rate": 5.834514781841933e-06, + "loss": 0.7398, + "step": 1050 + }, + { + "epoch": 0.22, + "learning_rate": 5.834192533777244e-06, + "loss": 0.9428, + "step": 1051 + }, + { + "epoch": 0.22, + "learning_rate": 5.8338699811787684e-06, + "loss": 0.8812, + "step": 1052 + }, + { + "epoch": 0.22, + "learning_rate": 5.833547124081166e-06, + "loss": 0.8458, + "step": 1053 + }, + { + "epoch": 0.22, + "learning_rate": 5.833223962519128e-06, + "loss": 0.947, + "step": 1054 + }, + { + "epoch": 0.22, + "learning_rate": 5.832900496527376e-06, + "loss": 0.8629, + "step": 1055 + }, + { + "epoch": 0.22, + "learning_rate": 5.83257672614067e-06, + "loss": 0.8939, + "step": 1056 + }, + { + "epoch": 0.22, + "learning_rate": 5.8322526513937944e-06, + "loss": 1.0234, + "step": 1057 + }, + { + "epoch": 0.22, + "learning_rate": 5.831928272321575e-06, + "loss": 1.0615, + "step": 1058 + }, + { + "epoch": 0.22, + "learning_rate": 5.831603588958865e-06, + "loss": 0.9492, + "step": 1059 + }, + { + "epoch": 0.22, + "learning_rate": 5.831278601340551e-06, + "loss": 0.8182, + "step": 1060 + }, + { + "epoch": 0.22, + "learning_rate": 5.8309533095015524e-06, + "loss": 0.8909, + "step": 1061 + }, + { + "epoch": 0.22, + "learning_rate": 5.830627713476824e-06, + "loss": 0.8554, + "step": 1062 + }, + { + "epoch": 0.22, + "learning_rate": 5.83030181330135e-06, + "loss": 0.9429, + "step": 1063 + }, + { + "epoch": 0.22, + "learning_rate": 5.829975609010147e-06, + "loss": 0.9133, + "step": 1064 + }, + { + "epoch": 0.22, + "learning_rate": 5.829649100638267e-06, + "loss": 0.856, + "step": 1065 + }, + { + "epoch": 0.22, + "learning_rate": 5.829322288220794e-06, + "loss": 0.9694, + "step": 1066 + }, + { + "epoch": 0.22, + "learning_rate": 5.828995171792842e-06, + "loss": 1.0192, + "step": 1067 + }, + { + "epoch": 0.22, + "learning_rate": 5.82866775138956e-06, + "loss": 1.3357, + "step": 1068 + }, + { + "epoch": 0.22, + "learning_rate": 5.828340027046132e-06, + "loss": 0.8133, + "step": 1069 + }, + { + "epoch": 0.22, + "learning_rate": 5.828011998797768e-06, + "loss": 0.9807, + "step": 1070 + }, + { + "epoch": 0.22, + "learning_rate": 5.8276836666797185e-06, + "loss": 1.3123, + "step": 1071 + }, + { + "epoch": 0.22, + "learning_rate": 5.827355030727259e-06, + "loss": 1.0884, + "step": 1072 + }, + { + "epoch": 0.22, + "learning_rate": 5.827026090975705e-06, + "loss": 1.0509, + "step": 1073 + }, + { + "epoch": 0.22, + "learning_rate": 5.826696847460397e-06, + "loss": 0.7968, + "step": 1074 + }, + { + "epoch": 0.22, + "learning_rate": 5.826367300216715e-06, + "loss": 0.9767, + "step": 1075 + }, + { + "epoch": 0.22, + "learning_rate": 5.826037449280069e-06, + "loss": 0.9132, + "step": 1076 + }, + { + "epoch": 0.22, + "learning_rate": 5.825707294685901e-06, + "loss": 0.9952, + "step": 1077 + }, + { + "epoch": 0.22, + "learning_rate": 5.825376836469685e-06, + "loss": 1.0592, + "step": 1078 + }, + { + "epoch": 0.22, + "learning_rate": 5.82504607466693e-06, + "loss": 1.088, + "step": 1079 + }, + { + "epoch": 0.22, + "learning_rate": 5.824715009313176e-06, + "loss": 1.0191, + "step": 1080 + }, + { + "epoch": 0.22, + "learning_rate": 5.8243836404439965e-06, + "loss": 0.8266, + "step": 1081 + }, + { + "epoch": 0.23, + "learning_rate": 5.824051968094995e-06, + "loss": 1.0313, + "step": 1082 + }, + { + "epoch": 0.23, + "learning_rate": 5.823719992301812e-06, + "loss": 0.9433, + "step": 1083 + }, + { + "epoch": 0.23, + "learning_rate": 5.8233877131001166e-06, + "loss": 1.0096, + "step": 1084 + }, + { + "epoch": 0.23, + "learning_rate": 5.8230551305256134e-06, + "loss": 0.893, + "step": 1085 + }, + { + "epoch": 0.23, + "learning_rate": 5.822722244614038e-06, + "loss": 1.0082, + "step": 1086 + }, + { + "epoch": 0.23, + "learning_rate": 5.8223890554011575e-06, + "loss": 1.1905, + "step": 1087 + }, + { + "epoch": 0.23, + "learning_rate": 5.8220555629227756e-06, + "loss": 0.8771, + "step": 1088 + }, + { + "epoch": 0.23, + "learning_rate": 5.821721767214725e-06, + "loss": 0.9725, + "step": 1089 + }, + { + "epoch": 0.23, + "learning_rate": 5.8213876683128715e-06, + "loss": 0.8936, + "step": 1090 + }, + { + "epoch": 0.23, + "learning_rate": 5.821053266253115e-06, + "loss": 0.9213, + "step": 1091 + }, + { + "epoch": 0.23, + "learning_rate": 5.820718561071386e-06, + "loss": 0.8797, + "step": 1092 + }, + { + "epoch": 0.23, + "learning_rate": 5.820383552803649e-06, + "loss": 0.9489, + "step": 1093 + }, + { + "epoch": 0.23, + "learning_rate": 5.820048241485899e-06, + "loss": 0.9349, + "step": 1094 + }, + { + "epoch": 0.23, + "learning_rate": 5.819712627154168e-06, + "loss": 0.9401, + "step": 1095 + }, + { + "epoch": 0.23, + "learning_rate": 5.819376709844516e-06, + "loss": 1.0598, + "step": 1096 + }, + { + "epoch": 0.23, + "learning_rate": 5.819040489593039e-06, + "loss": 1.0598, + "step": 1097 + }, + { + "epoch": 0.23, + "learning_rate": 5.81870396643586e-06, + "loss": 0.8915, + "step": 1098 + }, + { + "epoch": 0.23, + "learning_rate": 5.818367140409143e-06, + "loss": 1.0263, + "step": 1099 + }, + { + "epoch": 0.23, + "learning_rate": 5.818030011549078e-06, + "loss": 1.0763, + "step": 1100 + }, + { + "epoch": 0.23, + "learning_rate": 5.817692579891888e-06, + "loss": 1.0323, + "step": 1101 + }, + { + "epoch": 0.23, + "learning_rate": 5.817354845473833e-06, + "loss": 0.9518, + "step": 1102 + }, + { + "epoch": 0.23, + "learning_rate": 5.8170168083311996e-06, + "loss": 1.0452, + "step": 1103 + }, + { + "epoch": 0.23, + "learning_rate": 5.816678468500311e-06, + "loss": 0.8417, + "step": 1104 + }, + { + "epoch": 0.23, + "learning_rate": 5.8163398260175225e-06, + "loss": 0.7488, + "step": 1105 + }, + { + "epoch": 0.23, + "learning_rate": 5.816000880919221e-06, + "loss": 0.7844, + "step": 1106 + }, + { + "epoch": 0.23, + "learning_rate": 5.8156616332418255e-06, + "loss": 0.9908, + "step": 1107 + }, + { + "epoch": 0.23, + "learning_rate": 5.815322083021788e-06, + "loss": 0.9178, + "step": 1108 + }, + { + "epoch": 0.23, + "learning_rate": 5.814982230295594e-06, + "loss": 0.9423, + "step": 1109 + }, + { + "epoch": 0.23, + "learning_rate": 5.814642075099761e-06, + "loss": 0.8646, + "step": 1110 + }, + { + "epoch": 0.23, + "learning_rate": 5.814301617470836e-06, + "loss": 1.0985, + "step": 1111 + }, + { + "epoch": 0.23, + "learning_rate": 5.813960857445404e-06, + "loss": 1.0272, + "step": 1112 + }, + { + "epoch": 0.23, + "learning_rate": 5.813619795060079e-06, + "loss": 1.0584, + "step": 1113 + }, + { + "epoch": 0.23, + "learning_rate": 5.8132784303515074e-06, + "loss": 0.9315, + "step": 1114 + }, + { + "epoch": 0.23, + "learning_rate": 5.81293676335637e-06, + "loss": 1.0742, + "step": 1115 + }, + { + "epoch": 0.23, + "learning_rate": 5.812594794111377e-06, + "loss": 0.8728, + "step": 1116 + }, + { + "epoch": 0.23, + "learning_rate": 5.812252522653274e-06, + "loss": 1.0212, + "step": 1117 + }, + { + "epoch": 0.23, + "learning_rate": 5.8119099490188395e-06, + "loss": 1.4132, + "step": 1118 + }, + { + "epoch": 0.23, + "learning_rate": 5.8115670732448805e-06, + "loss": 0.8776, + "step": 1119 + }, + { + "epoch": 0.23, + "learning_rate": 5.81122389536824e-06, + "loss": 0.9742, + "step": 1120 + }, + { + "epoch": 0.23, + "learning_rate": 5.810880415425794e-06, + "loss": 1.2125, + "step": 1121 + }, + { + "epoch": 0.23, + "learning_rate": 5.810536633454447e-06, + "loss": 0.867, + "step": 1122 + }, + { + "epoch": 0.23, + "learning_rate": 5.8101925494911375e-06, + "loss": 1.2472, + "step": 1123 + }, + { + "epoch": 0.23, + "learning_rate": 5.809848163572842e-06, + "loss": 0.8118, + "step": 1124 + }, + { + "epoch": 0.23, + "learning_rate": 5.80950347573656e-06, + "loss": 1.2535, + "step": 1125 + }, + { + "epoch": 0.23, + "learning_rate": 5.809158486019331e-06, + "loss": 0.9405, + "step": 1126 + }, + { + "epoch": 0.23, + "learning_rate": 5.8088131944582215e-06, + "loss": 0.8118, + "step": 1127 + }, + { + "epoch": 0.23, + "learning_rate": 5.808467601090337e-06, + "loss": 1.2759, + "step": 1128 + }, + { + "epoch": 0.23, + "learning_rate": 5.808121705952808e-06, + "loss": 0.867, + "step": 1129 + }, + { + "epoch": 0.24, + "learning_rate": 5.807775509082801e-06, + "loss": 0.826, + "step": 1130 + }, + { + "epoch": 0.24, + "learning_rate": 5.807429010517517e-06, + "loss": 1.0415, + "step": 1131 + }, + { + "epoch": 0.24, + "learning_rate": 5.807082210294187e-06, + "loss": 1.0759, + "step": 1132 + }, + { + "epoch": 0.24, + "learning_rate": 5.806735108450073e-06, + "loss": 1.1105, + "step": 1133 + }, + { + "epoch": 0.24, + "learning_rate": 5.806387705022472e-06, + "loss": 1.0285, + "step": 1134 + }, + { + "epoch": 0.24, + "learning_rate": 5.806040000048712e-06, + "loss": 1.0037, + "step": 1135 + }, + { + "epoch": 0.24, + "learning_rate": 5.805691993566155e-06, + "loss": 0.708, + "step": 1136 + }, + { + "epoch": 0.24, + "learning_rate": 5.805343685612192e-06, + "loss": 0.9751, + "step": 1137 + }, + { + "epoch": 0.24, + "learning_rate": 5.804995076224252e-06, + "loss": 0.9608, + "step": 1138 + }, + { + "epoch": 0.24, + "learning_rate": 5.8046461654397905e-06, + "loss": 0.8493, + "step": 1139 + }, + { + "epoch": 0.24, + "learning_rate": 5.804296953296299e-06, + "loss": 1.0242, + "step": 1140 + }, + { + "epoch": 0.24, + "learning_rate": 5.803947439831299e-06, + "loss": 0.947, + "step": 1141 + }, + { + "epoch": 0.24, + "learning_rate": 5.803597625082348e-06, + "loss": 1.0318, + "step": 1142 + }, + { + "epoch": 0.24, + "learning_rate": 5.80324750908703e-06, + "loss": 0.9022, + "step": 1143 + }, + { + "epoch": 0.24, + "learning_rate": 5.80289709188297e-06, + "loss": 0.8594, + "step": 1144 + }, + { + "epoch": 0.24, + "learning_rate": 5.802546373507815e-06, + "loss": 1.0755, + "step": 1145 + }, + { + "epoch": 0.24, + "learning_rate": 5.802195353999254e-06, + "loss": 0.9491, + "step": 1146 + }, + { + "epoch": 0.24, + "learning_rate": 5.801844033395002e-06, + "loss": 1.0117, + "step": 1147 + }, + { + "epoch": 0.24, + "learning_rate": 5.801492411732807e-06, + "loss": 0.919, + "step": 1148 + }, + { + "epoch": 0.24, + "learning_rate": 5.8011404890504536e-06, + "loss": 0.9064, + "step": 1149 + }, + { + "epoch": 0.24, + "learning_rate": 5.800788265385754e-06, + "loss": 1.1277, + "step": 1150 + }, + { + "epoch": 0.24, + "learning_rate": 5.800435740776555e-06, + "loss": 0.9631, + "step": 1151 + }, + { + "epoch": 0.24, + "learning_rate": 5.800082915260736e-06, + "loss": 1.1648, + "step": 1152 + }, + { + "epoch": 0.24, + "learning_rate": 5.799729788876207e-06, + "loss": 0.8331, + "step": 1153 + }, + { + "epoch": 0.24, + "learning_rate": 5.799376361660913e-06, + "loss": 0.8657, + "step": 1154 + }, + { + "epoch": 0.24, + "learning_rate": 5.799022633652828e-06, + "loss": 0.8781, + "step": 1155 + }, + { + "epoch": 0.24, + "learning_rate": 5.798668604889961e-06, + "loss": 0.8884, + "step": 1156 + }, + { + "epoch": 0.24, + "learning_rate": 5.798314275410351e-06, + "loss": 0.8026, + "step": 1157 + }, + { + "epoch": 0.24, + "learning_rate": 5.797959645252074e-06, + "loss": 0.8634, + "step": 1158 + }, + { + "epoch": 0.24, + "learning_rate": 5.7976047144532305e-06, + "loss": 0.8528, + "step": 1159 + }, + { + "epoch": 0.24, + "learning_rate": 5.7972494830519614e-06, + "loss": 1.106, + "step": 1160 + }, + { + "epoch": 0.24, + "learning_rate": 5.796893951086435e-06, + "loss": 1.2337, + "step": 1161 + }, + { + "epoch": 0.24, + "learning_rate": 5.796538118594852e-06, + "loss": 1.0184, + "step": 1162 + }, + { + "epoch": 0.24, + "learning_rate": 5.796181985615449e-06, + "loss": 0.9233, + "step": 1163 + }, + { + "epoch": 0.24, + "learning_rate": 5.79582555218649e-06, + "loss": 1.009, + "step": 1164 + }, + { + "epoch": 0.24, + "learning_rate": 5.795468818346276e-06, + "loss": 1.2614, + "step": 1165 + }, + { + "epoch": 0.24, + "learning_rate": 5.795111784133137e-06, + "loss": 0.9966, + "step": 1166 + }, + { + "epoch": 0.24, + "learning_rate": 5.7947544495854355e-06, + "loss": 1.0682, + "step": 1167 + }, + { + "epoch": 0.24, + "learning_rate": 5.794396814741568e-06, + "loss": 0.908, + "step": 1168 + }, + { + "epoch": 0.24, + "learning_rate": 5.794038879639962e-06, + "loss": 1.3163, + "step": 1169 + }, + { + "epoch": 0.24, + "learning_rate": 5.793680644319078e-06, + "loss": 0.8533, + "step": 1170 + }, + { + "epoch": 0.24, + "learning_rate": 5.793322108817408e-06, + "loss": 1.0113, + "step": 1171 + }, + { + "epoch": 0.24, + "learning_rate": 5.792963273173477e-06, + "loss": 0.9631, + "step": 1172 + }, + { + "epoch": 0.24, + "learning_rate": 5.792604137425841e-06, + "loss": 0.7872, + "step": 1173 + }, + { + "epoch": 0.24, + "learning_rate": 5.7922447016130905e-06, + "loss": 0.8505, + "step": 1174 + }, + { + "epoch": 0.24, + "learning_rate": 5.791884965773846e-06, + "loss": 0.9717, + "step": 1175 + }, + { + "epoch": 0.24, + "learning_rate": 5.79152492994676e-06, + "loss": 0.9567, + "step": 1176 + }, + { + "epoch": 0.24, + "learning_rate": 5.791164594170521e-06, + "loss": 1.1013, + "step": 1177 + }, + { + "epoch": 0.25, + "learning_rate": 5.7908039584838446e-06, + "loss": 1.0333, + "step": 1178 + }, + { + "epoch": 0.25, + "learning_rate": 5.790443022925483e-06, + "loss": 0.8177, + "step": 1179 + }, + { + "epoch": 0.25, + "learning_rate": 5.790081787534217e-06, + "loss": 0.9923, + "step": 1180 + }, + { + "epoch": 0.25, + "learning_rate": 5.7897202523488615e-06, + "loss": 1.0255, + "step": 1181 + }, + { + "epoch": 0.25, + "learning_rate": 5.789358417408264e-06, + "loss": 1.0225, + "step": 1182 + }, + { + "epoch": 0.25, + "learning_rate": 5.788996282751304e-06, + "loss": 0.9937, + "step": 1183 + }, + { + "epoch": 0.25, + "learning_rate": 5.788633848416893e-06, + "loss": 0.8712, + "step": 1184 + }, + { + "epoch": 0.25, + "learning_rate": 5.788271114443973e-06, + "loss": 0.986, + "step": 1185 + }, + { + "epoch": 0.25, + "learning_rate": 5.787908080871521e-06, + "loss": 1.0847, + "step": 1186 + }, + { + "epoch": 0.25, + "learning_rate": 5.787544747738546e-06, + "loss": 0.914, + "step": 1187 + }, + { + "epoch": 0.25, + "learning_rate": 5.787181115084085e-06, + "loss": 0.9976, + "step": 1188 + }, + { + "epoch": 0.25, + "learning_rate": 5.7868171829472125e-06, + "loss": 0.9926, + "step": 1189 + }, + { + "epoch": 0.25, + "learning_rate": 5.786452951367033e-06, + "loss": 1.0915, + "step": 1190 + }, + { + "epoch": 0.25, + "learning_rate": 5.786088420382682e-06, + "loss": 0.8889, + "step": 1191 + }, + { + "epoch": 0.25, + "learning_rate": 5.78572359003333e-06, + "loss": 0.8812, + "step": 1192 + }, + { + "epoch": 0.25, + "learning_rate": 5.7853584603581766e-06, + "loss": 1.2079, + "step": 1193 + }, + { + "epoch": 0.25, + "learning_rate": 5.784993031396455e-06, + "loss": 0.8723, + "step": 1194 + }, + { + "epoch": 0.25, + "learning_rate": 5.7846273031874316e-06, + "loss": 0.9157, + "step": 1195 + }, + { + "epoch": 0.25, + "learning_rate": 5.784261275770403e-06, + "loss": 0.9028, + "step": 1196 + }, + { + "epoch": 0.25, + "learning_rate": 5.783894949184699e-06, + "loss": 0.7473, + "step": 1197 + }, + { + "epoch": 0.25, + "learning_rate": 5.783528323469681e-06, + "loss": 0.948, + "step": 1198 + }, + { + "epoch": 0.25, + "learning_rate": 5.783161398664743e-06, + "loss": 1.4668, + "step": 1199 + }, + { + "epoch": 0.25, + "learning_rate": 5.782794174809311e-06, + "loss": 0.9362, + "step": 1200 + }, + { + "epoch": 0.25, + "learning_rate": 5.782426651942845e-06, + "loss": 0.981, + "step": 1201 + }, + { + "epoch": 0.25, + "learning_rate": 5.782058830104833e-06, + "loss": 0.9583, + "step": 1202 + }, + { + "epoch": 0.25, + "learning_rate": 5.781690709334798e-06, + "loss": 1.0419, + "step": 1203 + }, + { + "epoch": 0.25, + "learning_rate": 5.781322289672296e-06, + "loss": 1.0876, + "step": 1204 + }, + { + "epoch": 0.25, + "learning_rate": 5.780953571156911e-06, + "loss": 1.0675, + "step": 1205 + }, + { + "epoch": 0.25, + "learning_rate": 5.780584553828264e-06, + "loss": 1.1144, + "step": 1206 + }, + { + "epoch": 0.25, + "learning_rate": 5.780215237726004e-06, + "loss": 0.9277, + "step": 1207 + }, + { + "epoch": 0.25, + "learning_rate": 5.779845622889816e-06, + "loss": 1.1704, + "step": 1208 + }, + { + "epoch": 0.25, + "learning_rate": 5.779475709359414e-06, + "loss": 1.0067, + "step": 1209 + }, + { + "epoch": 0.25, + "learning_rate": 5.779105497174546e-06, + "loss": 0.8787, + "step": 1210 + }, + { + "epoch": 0.25, + "learning_rate": 5.778734986374991e-06, + "loss": 0.9275, + "step": 1211 + }, + { + "epoch": 0.25, + "learning_rate": 5.778364177000559e-06, + "loss": 0.936, + "step": 1212 + }, + { + "epoch": 0.25, + "learning_rate": 5.777993069091094e-06, + "loss": 1.1035, + "step": 1213 + }, + { + "epoch": 0.25, + "learning_rate": 5.777621662686472e-06, + "loss": 0.9816, + "step": 1214 + }, + { + "epoch": 0.25, + "learning_rate": 5.777249957826602e-06, + "loss": 0.9697, + "step": 1215 + }, + { + "epoch": 0.25, + "learning_rate": 5.776877954551421e-06, + "loss": 0.9033, + "step": 1216 + }, + { + "epoch": 0.25, + "learning_rate": 5.776505652900902e-06, + "loss": 0.8793, + "step": 1217 + }, + { + "epoch": 0.25, + "learning_rate": 5.776133052915049e-06, + "loss": 1.1023, + "step": 1218 + }, + { + "epoch": 0.25, + "learning_rate": 5.775760154633897e-06, + "loss": 0.8613, + "step": 1219 + }, + { + "epoch": 0.25, + "learning_rate": 5.775386958097515e-06, + "loss": 0.9521, + "step": 1220 + }, + { + "epoch": 0.25, + "learning_rate": 5.775013463346003e-06, + "loss": 1.1608, + "step": 1221 + }, + { + "epoch": 0.25, + "learning_rate": 5.774639670419492e-06, + "loss": 0.8457, + "step": 1222 + }, + { + "epoch": 0.25, + "learning_rate": 5.7742655793581455e-06, + "loss": 1.0596, + "step": 1223 + }, + { + "epoch": 0.25, + "learning_rate": 5.773891190202161e-06, + "loss": 1.0445, + "step": 1224 + }, + { + "epoch": 0.25, + "learning_rate": 5.773516502991767e-06, + "loss": 0.8873, + "step": 1225 + }, + { + "epoch": 0.25, + "learning_rate": 5.773141517767222e-06, + "loss": 0.9935, + "step": 1226 + }, + { + "epoch": 0.26, + "learning_rate": 5.772766234568818e-06, + "loss": 1.1657, + "step": 1227 + }, + { + "epoch": 0.26, + "learning_rate": 5.772390653436881e-06, + "loss": 1.0227, + "step": 1228 + }, + { + "epoch": 0.26, + "learning_rate": 5.772014774411765e-06, + "loss": 0.875, + "step": 1229 + }, + { + "epoch": 0.26, + "learning_rate": 5.77163859753386e-06, + "loss": 0.8304, + "step": 1230 + }, + { + "epoch": 0.26, + "learning_rate": 5.771262122843586e-06, + "loss": 0.8788, + "step": 1231 + }, + { + "epoch": 0.26, + "learning_rate": 5.770885350381395e-06, + "loss": 0.83, + "step": 1232 + }, + { + "epoch": 0.26, + "learning_rate": 5.77050828018777e-06, + "loss": 0.7981, + "step": 1233 + }, + { + "epoch": 0.26, + "learning_rate": 5.770130912303227e-06, + "loss": 1.0351, + "step": 1234 + }, + { + "epoch": 0.26, + "learning_rate": 5.769753246768316e-06, + "loss": 0.959, + "step": 1235 + }, + { + "epoch": 0.26, + "learning_rate": 5.7693752836236166e-06, + "loss": 0.7759, + "step": 1236 + }, + { + "epoch": 0.26, + "learning_rate": 5.76899702290974e-06, + "loss": 1.1716, + "step": 1237 + }, + { + "epoch": 0.26, + "learning_rate": 5.768618464667332e-06, + "loss": 1.1607, + "step": 1238 + }, + { + "epoch": 0.26, + "learning_rate": 5.768239608937066e-06, + "loss": 1.1087, + "step": 1239 + }, + { + "epoch": 0.26, + "learning_rate": 5.767860455759653e-06, + "loss": 0.8451, + "step": 1240 + }, + { + "epoch": 0.26, + "learning_rate": 5.767481005175831e-06, + "loss": 1.0777, + "step": 1241 + }, + { + "epoch": 0.26, + "learning_rate": 5.767101257226372e-06, + "loss": 0.8386, + "step": 1242 + }, + { + "epoch": 0.26, + "learning_rate": 5.766721211952081e-06, + "loss": 0.8005, + "step": 1243 + }, + { + "epoch": 0.26, + "learning_rate": 5.766340869393793e-06, + "loss": 1.1504, + "step": 1244 + }, + { + "epoch": 0.26, + "learning_rate": 5.765960229592376e-06, + "loss": 0.9652, + "step": 1245 + }, + { + "epoch": 0.26, + "learning_rate": 5.76557929258873e-06, + "loss": 0.893, + "step": 1246 + }, + { + "epoch": 0.26, + "learning_rate": 5.765198058423786e-06, + "loss": 0.9775, + "step": 1247 + }, + { + "epoch": 0.26, + "learning_rate": 5.7648165271385074e-06, + "loss": 0.944, + "step": 1248 + }, + { + "epoch": 0.26, + "learning_rate": 5.764434698773891e-06, + "loss": 0.9917, + "step": 1249 + }, + { + "epoch": 0.26, + "learning_rate": 5.7640525733709625e-06, + "loss": 0.9783, + "step": 1250 + }, + { + "epoch": 0.26, + "learning_rate": 5.7636701509707835e-06, + "loss": 1.0032, + "step": 1251 + }, + { + "epoch": 0.26, + "learning_rate": 5.763287431614444e-06, + "loss": 0.9289, + "step": 1252 + }, + { + "epoch": 0.26, + "learning_rate": 5.762904415343066e-06, + "loss": 0.6659, + "step": 1253 + }, + { + "epoch": 0.26, + "learning_rate": 5.762521102197807e-06, + "loss": 1.0519, + "step": 1254 + }, + { + "epoch": 0.26, + "learning_rate": 5.762137492219852e-06, + "loss": 1.0459, + "step": 1255 + }, + { + "epoch": 0.26, + "learning_rate": 5.76175358545042e-06, + "loss": 0.9545, + "step": 1256 + }, + { + "epoch": 0.26, + "learning_rate": 5.761369381930763e-06, + "loss": 0.8369, + "step": 1257 + }, + { + "epoch": 0.26, + "learning_rate": 5.760984881702163e-06, + "loss": 1.1865, + "step": 1258 + }, + { + "epoch": 0.26, + "learning_rate": 5.760600084805934e-06, + "loss": 0.9856, + "step": 1259 + }, + { + "epoch": 0.26, + "learning_rate": 5.760214991283424e-06, + "loss": 0.7864, + "step": 1260 + }, + { + "epoch": 0.26, + "learning_rate": 5.759829601176009e-06, + "loss": 0.9418, + "step": 1261 + }, + { + "epoch": 0.26, + "learning_rate": 5.759443914525101e-06, + "loss": 0.8257, + "step": 1262 + }, + { + "epoch": 0.26, + "learning_rate": 5.759057931372141e-06, + "loss": 0.8736, + "step": 1263 + }, + { + "epoch": 0.26, + "learning_rate": 5.758671651758603e-06, + "loss": 1.1846, + "step": 1264 + }, + { + "epoch": 0.26, + "learning_rate": 5.758285075725993e-06, + "loss": 0.9357, + "step": 1265 + }, + { + "epoch": 0.26, + "learning_rate": 5.757898203315848e-06, + "loss": 1.132, + "step": 1266 + }, + { + "epoch": 0.26, + "learning_rate": 5.7575110345697384e-06, + "loss": 0.7936, + "step": 1267 + }, + { + "epoch": 0.26, + "learning_rate": 5.757123569529265e-06, + "loss": 0.9026, + "step": 1268 + }, + { + "epoch": 0.26, + "learning_rate": 5.756735808236061e-06, + "loss": 0.857, + "step": 1269 + }, + { + "epoch": 0.26, + "learning_rate": 5.756347750731791e-06, + "loss": 0.7675, + "step": 1270 + }, + { + "epoch": 0.26, + "learning_rate": 5.755959397058152e-06, + "loss": 1.0567, + "step": 1271 + }, + { + "epoch": 0.26, + "learning_rate": 5.7555707472568715e-06, + "loss": 0.8697, + "step": 1272 + }, + { + "epoch": 0.26, + "learning_rate": 5.755181801369711e-06, + "loss": 0.9349, + "step": 1273 + }, + { + "epoch": 0.26, + "learning_rate": 5.7547925594384625e-06, + "loss": 1.029, + "step": 1274 + }, + { + "epoch": 0.27, + "learning_rate": 5.754403021504951e-06, + "loss": 1.0046, + "step": 1275 + }, + { + "epoch": 0.27, + "learning_rate": 5.754013187611032e-06, + "loss": 0.8283, + "step": 1276 + }, + { + "epoch": 0.27, + "learning_rate": 5.7536230577985915e-06, + "loss": 1.0234, + "step": 1277 + }, + { + "epoch": 0.27, + "learning_rate": 5.753232632109549e-06, + "loss": 0.7838, + "step": 1278 + }, + { + "epoch": 0.27, + "learning_rate": 5.752841910585857e-06, + "loss": 1.1348, + "step": 1279 + }, + { + "epoch": 0.27, + "learning_rate": 5.7524508932695e-06, + "loss": 1.0224, + "step": 1280 + }, + { + "epoch": 0.27, + "learning_rate": 5.752059580202488e-06, + "loss": 0.8442, + "step": 1281 + }, + { + "epoch": 0.27, + "learning_rate": 5.7516679714268736e-06, + "loss": 0.8602, + "step": 1282 + }, + { + "epoch": 0.27, + "learning_rate": 5.75127606698473e-06, + "loss": 0.9216, + "step": 1283 + }, + { + "epoch": 0.27, + "learning_rate": 5.75088386691817e-06, + "loss": 1.0046, + "step": 1284 + }, + { + "epoch": 0.27, + "learning_rate": 5.750491371269334e-06, + "loss": 0.9962, + "step": 1285 + }, + { + "epoch": 0.27, + "learning_rate": 5.750098580080397e-06, + "loss": 0.7934, + "step": 1286 + }, + { + "epoch": 0.27, + "learning_rate": 5.749705493393564e-06, + "loss": 1.1718, + "step": 1287 + }, + { + "epoch": 0.27, + "learning_rate": 5.749312111251072e-06, + "loss": 1.0628, + "step": 1288 + }, + { + "epoch": 0.27, + "learning_rate": 5.748918433695189e-06, + "loss": 0.997, + "step": 1289 + }, + { + "epoch": 0.27, + "learning_rate": 5.7485244607682185e-06, + "loss": 1.0559, + "step": 1290 + }, + { + "epoch": 0.27, + "learning_rate": 5.748130192512489e-06, + "loss": 0.7938, + "step": 1291 + }, + { + "epoch": 0.27, + "learning_rate": 5.747735628970367e-06, + "loss": 1.2739, + "step": 1292 + }, + { + "epoch": 0.27, + "learning_rate": 5.747340770184247e-06, + "loss": 1.2369, + "step": 1293 + }, + { + "epoch": 0.27, + "learning_rate": 5.7469456161965585e-06, + "loss": 0.9868, + "step": 1294 + }, + { + "epoch": 0.27, + "learning_rate": 5.7465501670497585e-06, + "loss": 0.8985, + "step": 1295 + }, + { + "epoch": 0.27, + "learning_rate": 5.74615442278634e-06, + "loss": 0.6933, + "step": 1296 + }, + { + "epoch": 0.27, + "learning_rate": 5.745758383448824e-06, + "loss": 1.133, + "step": 1297 + }, + { + "epoch": 0.27, + "learning_rate": 5.745362049079766e-06, + "loss": 1.0458, + "step": 1298 + }, + { + "epoch": 0.27, + "learning_rate": 5.744965419721752e-06, + "loss": 0.9643, + "step": 1299 + }, + { + "epoch": 0.27, + "learning_rate": 5.744568495417398e-06, + "loss": 0.986, + "step": 1300 + }, + { + "epoch": 0.27, + "learning_rate": 5.7441712762093565e-06, + "loss": 0.7875, + "step": 1301 + }, + { + "epoch": 0.27, + "learning_rate": 5.743773762140307e-06, + "loss": 1.0358, + "step": 1302 + }, + { + "epoch": 0.27, + "learning_rate": 5.743375953252962e-06, + "loss": 1.1892, + "step": 1303 + }, + { + "epoch": 0.27, + "learning_rate": 5.742977849590067e-06, + "loss": 0.7553, + "step": 1304 + }, + { + "epoch": 0.27, + "learning_rate": 5.742579451194398e-06, + "loss": 1.0576, + "step": 1305 + }, + { + "epoch": 0.27, + "learning_rate": 5.742180758108762e-06, + "loss": 1.2428, + "step": 1306 + }, + { + "epoch": 0.27, + "learning_rate": 5.741781770376e-06, + "loss": 0.8215, + "step": 1307 + }, + { + "epoch": 0.27, + "learning_rate": 5.741382488038982e-06, + "loss": 0.8812, + "step": 1308 + }, + { + "epoch": 0.27, + "learning_rate": 5.740982911140611e-06, + "loss": 1.0587, + "step": 1309 + }, + { + "epoch": 0.27, + "learning_rate": 5.740583039723822e-06, + "loss": 1.1087, + "step": 1310 + }, + { + "epoch": 0.27, + "learning_rate": 5.740182873831581e-06, + "loss": 0.9946, + "step": 1311 + }, + { + "epoch": 0.27, + "learning_rate": 5.739782413506886e-06, + "loss": 0.9835, + "step": 1312 + }, + { + "epoch": 0.27, + "learning_rate": 5.739381658792767e-06, + "loss": 0.9301, + "step": 1313 + }, + { + "epoch": 0.27, + "learning_rate": 5.738980609732283e-06, + "loss": 0.8302, + "step": 1314 + }, + { + "epoch": 0.27, + "learning_rate": 5.738579266368528e-06, + "loss": 0.886, + "step": 1315 + }, + { + "epoch": 0.27, + "learning_rate": 5.738177628744626e-06, + "loss": 0.922, + "step": 1316 + }, + { + "epoch": 0.27, + "learning_rate": 5.737775696903734e-06, + "loss": 1.0018, + "step": 1317 + }, + { + "epoch": 0.27, + "learning_rate": 5.737373470889037e-06, + "loss": 1.0193, + "step": 1318 + }, + { + "epoch": 0.27, + "learning_rate": 5.736970950743758e-06, + "loss": 0.8838, + "step": 1319 + }, + { + "epoch": 0.27, + "learning_rate": 5.736568136511145e-06, + "loss": 1.16, + "step": 1320 + }, + { + "epoch": 0.27, + "learning_rate": 5.736165028234481e-06, + "loss": 1.0633, + "step": 1321 + }, + { + "epoch": 0.27, + "learning_rate": 5.73576162595708e-06, + "loss": 0.7789, + "step": 1322 + }, + { + "epoch": 0.28, + "learning_rate": 5.735357929722287e-06, + "loss": 1.013, + "step": 1323 + }, + { + "epoch": 0.28, + "learning_rate": 5.73495393957348e-06, + "loss": 0.7331, + "step": 1324 + }, + { + "epoch": 0.28, + "learning_rate": 5.734549655554067e-06, + "loss": 0.9179, + "step": 1325 + }, + { + "epoch": 0.28, + "learning_rate": 5.734145077707489e-06, + "loss": 0.905, + "step": 1326 + }, + { + "epoch": 0.28, + "learning_rate": 5.7337402060772174e-06, + "loss": 0.9427, + "step": 1327 + }, + { + "epoch": 0.28, + "learning_rate": 5.733335040706756e-06, + "loss": 0.9035, + "step": 1328 + }, + { + "epoch": 0.28, + "learning_rate": 5.73292958163964e-06, + "loss": 1.0931, + "step": 1329 + }, + { + "epoch": 0.28, + "learning_rate": 5.732523828919435e-06, + "loss": 1.2399, + "step": 1330 + }, + { + "epoch": 0.28, + "learning_rate": 5.732117782589739e-06, + "loss": 0.9637, + "step": 1331 + }, + { + "epoch": 0.28, + "learning_rate": 5.731711442694183e-06, + "loss": 1.1295, + "step": 1332 + }, + { + "epoch": 0.28, + "learning_rate": 5.731304809276426e-06, + "loss": 0.9329, + "step": 1333 + }, + { + "epoch": 0.28, + "learning_rate": 5.730897882380163e-06, + "loss": 0.8749, + "step": 1334 + }, + { + "epoch": 0.28, + "learning_rate": 5.7304906620491176e-06, + "loss": 0.8793, + "step": 1335 + }, + { + "epoch": 0.28, + "learning_rate": 5.730083148327045e-06, + "loss": 0.9065, + "step": 1336 + }, + { + "epoch": 0.28, + "learning_rate": 5.729675341257734e-06, + "loss": 0.9268, + "step": 1337 + }, + { + "epoch": 0.28, + "learning_rate": 5.729267240885002e-06, + "loss": 1.0619, + "step": 1338 + }, + { + "epoch": 0.28, + "learning_rate": 5.728858847252699e-06, + "loss": 1.0314, + "step": 1339 + }, + { + "epoch": 0.28, + "learning_rate": 5.728450160404708e-06, + "loss": 0.8773, + "step": 1340 + }, + { + "epoch": 0.28, + "learning_rate": 5.7280411803849425e-06, + "loss": 1.0221, + "step": 1341 + }, + { + "epoch": 0.28, + "learning_rate": 5.727631907237346e-06, + "loss": 0.9704, + "step": 1342 + }, + { + "epoch": 0.28, + "learning_rate": 5.727222341005896e-06, + "loss": 0.953, + "step": 1343 + }, + { + "epoch": 0.28, + "learning_rate": 5.7268124817346e-06, + "loss": 0.8026, + "step": 1344 + }, + { + "epoch": 0.28, + "learning_rate": 5.726402329467497e-06, + "loss": 1.0338, + "step": 1345 + }, + { + "epoch": 0.28, + "learning_rate": 5.72599188424866e-06, + "loss": 1.1201, + "step": 1346 + }, + { + "epoch": 0.28, + "learning_rate": 5.725581146122188e-06, + "loss": 0.829, + "step": 1347 + }, + { + "epoch": 0.28, + "learning_rate": 5.725170115132216e-06, + "loss": 1.1731, + "step": 1348 + }, + { + "epoch": 0.28, + "learning_rate": 5.72475879132291e-06, + "loss": 1.0117, + "step": 1349 + }, + { + "epoch": 0.28, + "learning_rate": 5.724347174738467e-06, + "loss": 1.0752, + "step": 1350 + }, + { + "epoch": 0.28, + "learning_rate": 5.723935265423114e-06, + "loss": 1.0349, + "step": 1351 + }, + { + "epoch": 0.28, + "learning_rate": 5.723523063421111e-06, + "loss": 1.0779, + "step": 1352 + }, + { + "epoch": 0.28, + "learning_rate": 5.723110568776749e-06, + "loss": 1.0484, + "step": 1353 + }, + { + "epoch": 0.28, + "learning_rate": 5.7226977815343505e-06, + "loss": 0.9359, + "step": 1354 + }, + { + "epoch": 0.28, + "learning_rate": 5.722284701738271e-06, + "loss": 1.0503, + "step": 1355 + }, + { + "epoch": 0.28, + "learning_rate": 5.721871329432894e-06, + "loss": 1.0998, + "step": 1356 + }, + { + "epoch": 0.28, + "learning_rate": 5.7214576646626355e-06, + "loss": 0.8949, + "step": 1357 + }, + { + "epoch": 0.28, + "learning_rate": 5.7210437074719475e-06, + "loss": 0.9159, + "step": 1358 + }, + { + "epoch": 0.28, + "learning_rate": 5.720629457905305e-06, + "loss": 0.9796, + "step": 1359 + }, + { + "epoch": 0.28, + "learning_rate": 5.720214916007223e-06, + "loss": 1.0845, + "step": 1360 + }, + { + "epoch": 0.28, + "learning_rate": 5.719800081822242e-06, + "loss": 0.7977, + "step": 1361 + }, + { + "epoch": 0.28, + "learning_rate": 5.719384955394936e-06, + "loss": 0.84, + "step": 1362 + }, + { + "epoch": 0.28, + "learning_rate": 5.718969536769911e-06, + "loss": 0.8895, + "step": 1363 + }, + { + "epoch": 0.28, + "learning_rate": 5.718553825991802e-06, + "loss": 1.0354, + "step": 1364 + }, + { + "epoch": 0.28, + "learning_rate": 5.7181378231052794e-06, + "loss": 1.1423, + "step": 1365 + }, + { + "epoch": 0.28, + "learning_rate": 5.717721528155042e-06, + "loss": 0.973, + "step": 1366 + }, + { + "epoch": 0.28, + "learning_rate": 5.717304941185821e-06, + "loss": 0.9532, + "step": 1367 + }, + { + "epoch": 0.28, + "learning_rate": 5.716888062242376e-06, + "loss": 1.0556, + "step": 1368 + }, + { + "epoch": 0.28, + "learning_rate": 5.7164708913695035e-06, + "loss": 0.7945, + "step": 1369 + }, + { + "epoch": 0.28, + "learning_rate": 5.716053428612027e-06, + "loss": 1.151, + "step": 1370 + }, + { + "epoch": 0.29, + "learning_rate": 5.715635674014804e-06, + "loss": 1.0896, + "step": 1371 + }, + { + "epoch": 0.29, + "learning_rate": 5.715217627622722e-06, + "loss": 0.8814, + "step": 1372 + }, + { + "epoch": 0.29, + "learning_rate": 5.7147992894807e-06, + "loss": 0.913, + "step": 1373 + }, + { + "epoch": 0.29, + "learning_rate": 5.7143806596336875e-06, + "loss": 0.9334, + "step": 1374 + }, + { + "epoch": 0.29, + "learning_rate": 5.713961738126666e-06, + "loss": 0.8147, + "step": 1375 + }, + { + "epoch": 0.29, + "learning_rate": 5.71354252500465e-06, + "loss": 0.8078, + "step": 1376 + }, + { + "epoch": 0.29, + "learning_rate": 5.7131230203126835e-06, + "loss": 0.932, + "step": 1377 + }, + { + "epoch": 0.29, + "learning_rate": 5.712703224095841e-06, + "loss": 1.1714, + "step": 1378 + }, + { + "epoch": 0.29, + "learning_rate": 5.712283136399233e-06, + "loss": 0.9776, + "step": 1379 + }, + { + "epoch": 0.29, + "learning_rate": 5.711862757267993e-06, + "loss": 0.8477, + "step": 1380 + }, + { + "epoch": 0.29, + "learning_rate": 5.711442086747295e-06, + "loss": 1.2616, + "step": 1381 + }, + { + "epoch": 0.29, + "learning_rate": 5.711021124882338e-06, + "loss": 0.9116, + "step": 1382 + }, + { + "epoch": 0.29, + "learning_rate": 5.710599871718354e-06, + "loss": 0.9108, + "step": 1383 + }, + { + "epoch": 0.29, + "learning_rate": 5.7101783273006076e-06, + "loss": 0.8877, + "step": 1384 + }, + { + "epoch": 0.29, + "learning_rate": 5.709756491674392e-06, + "loss": 1.0946, + "step": 1385 + }, + { + "epoch": 0.29, + "learning_rate": 5.709334364885037e-06, + "loss": 1.0094, + "step": 1386 + }, + { + "epoch": 0.29, + "learning_rate": 5.708911946977897e-06, + "loss": 0.8305, + "step": 1387 + }, + { + "epoch": 0.29, + "learning_rate": 5.708489237998361e-06, + "loss": 0.7183, + "step": 1388 + }, + { + "epoch": 0.29, + "learning_rate": 5.70806623799185e-06, + "loss": 1.0361, + "step": 1389 + }, + { + "epoch": 0.29, + "learning_rate": 5.707642947003815e-06, + "loss": 0.915, + "step": 1390 + }, + { + "epoch": 0.29, + "learning_rate": 5.707219365079738e-06, + "loss": 1.1871, + "step": 1391 + }, + { + "epoch": 0.29, + "learning_rate": 5.7067954922651335e-06, + "loss": 0.7822, + "step": 1392 + }, + { + "epoch": 0.29, + "learning_rate": 5.7063713286055475e-06, + "loss": 1.109, + "step": 1393 + }, + { + "epoch": 0.29, + "learning_rate": 5.705946874146555e-06, + "loss": 0.9688, + "step": 1394 + }, + { + "epoch": 0.29, + "learning_rate": 5.705522128933764e-06, + "loss": 0.8815, + "step": 1395 + }, + { + "epoch": 0.29, + "learning_rate": 5.705097093012813e-06, + "loss": 1.0187, + "step": 1396 + }, + { + "epoch": 0.29, + "learning_rate": 5.704671766429372e-06, + "loss": 0.9128, + "step": 1397 + }, + { + "epoch": 0.29, + "learning_rate": 5.704246149229143e-06, + "loss": 1.0993, + "step": 1398 + }, + { + "epoch": 0.29, + "learning_rate": 5.703820241457859e-06, + "loss": 0.8005, + "step": 1399 + }, + { + "epoch": 0.29, + "learning_rate": 5.703394043161282e-06, + "loss": 0.9795, + "step": 1400 + }, + { + "epoch": 0.29, + "learning_rate": 5.702967554385209e-06, + "loss": 0.922, + "step": 1401 + }, + { + "epoch": 0.29, + "learning_rate": 5.702540775175464e-06, + "loss": 1.0006, + "step": 1402 + }, + { + "epoch": 0.29, + "learning_rate": 5.702113705577906e-06, + "loss": 0.8622, + "step": 1403 + }, + { + "epoch": 0.29, + "learning_rate": 5.701686345638423e-06, + "loss": 1.0162, + "step": 1404 + }, + { + "epoch": 0.29, + "learning_rate": 5.701258695402935e-06, + "loss": 1.2302, + "step": 1405 + }, + { + "epoch": 0.29, + "learning_rate": 5.7008307549173935e-06, + "loss": 0.8502, + "step": 1406 + }, + { + "epoch": 0.29, + "learning_rate": 5.70040252422778e-06, + "loss": 0.9586, + "step": 1407 + }, + { + "epoch": 0.29, + "learning_rate": 5.699974003380107e-06, + "loss": 1.107, + "step": 1408 + }, + { + "epoch": 0.29, + "learning_rate": 5.699545192420421e-06, + "loss": 0.8121, + "step": 1409 + }, + { + "epoch": 0.29, + "learning_rate": 5.699116091394796e-06, + "loss": 1.0965, + "step": 1410 + }, + { + "epoch": 0.29, + "learning_rate": 5.69868670034934e-06, + "loss": 1.1325, + "step": 1411 + }, + { + "epoch": 0.29, + "learning_rate": 5.6982570193301906e-06, + "loss": 0.9149, + "step": 1412 + }, + { + "epoch": 0.29, + "learning_rate": 5.697827048383517e-06, + "loss": 0.9861, + "step": 1413 + }, + { + "epoch": 0.29, + "learning_rate": 5.697396787555521e-06, + "loss": 0.9495, + "step": 1414 + }, + { + "epoch": 0.29, + "learning_rate": 5.69696623689243e-06, + "loss": 0.8327, + "step": 1415 + }, + { + "epoch": 0.29, + "learning_rate": 5.696535396440511e-06, + "loss": 0.8489, + "step": 1416 + }, + { + "epoch": 0.29, + "learning_rate": 5.696104266246055e-06, + "loss": 1.0336, + "step": 1417 + }, + { + "epoch": 0.29, + "learning_rate": 5.695672846355389e-06, + "loss": 0.9696, + "step": 1418 + }, + { + "epoch": 0.3, + "learning_rate": 5.695241136814868e-06, + "loss": 0.6807, + "step": 1419 + }, + { + "epoch": 0.3, + "learning_rate": 5.6948091376708786e-06, + "loss": 1.1509, + "step": 1420 + }, + { + "epoch": 0.3, + "learning_rate": 5.69437684896984e-06, + "loss": 0.8675, + "step": 1421 + }, + { + "epoch": 0.3, + "learning_rate": 5.693944270758201e-06, + "loss": 0.8921, + "step": 1422 + }, + { + "epoch": 0.3, + "learning_rate": 5.693511403082442e-06, + "loss": 0.8315, + "step": 1423 + }, + { + "epoch": 0.3, + "learning_rate": 5.693078245989076e-06, + "loss": 1.0159, + "step": 1424 + }, + { + "epoch": 0.3, + "learning_rate": 5.692644799524644e-06, + "loss": 0.8898, + "step": 1425 + }, + { + "epoch": 0.3, + "learning_rate": 5.6922110637357204e-06, + "loss": 1.0466, + "step": 1426 + }, + { + "epoch": 0.3, + "learning_rate": 5.69177703866891e-06, + "loss": 0.7908, + "step": 1427 + }, + { + "epoch": 0.3, + "learning_rate": 5.691342724370848e-06, + "loss": 1.2833, + "step": 1428 + }, + { + "epoch": 0.3, + "learning_rate": 5.6909081208882045e-06, + "loss": 0.9524, + "step": 1429 + }, + { + "epoch": 0.3, + "learning_rate": 5.690473228267674e-06, + "loss": 0.896, + "step": 1430 + }, + { + "epoch": 0.3, + "learning_rate": 5.690038046555987e-06, + "loss": 0.7239, + "step": 1431 + }, + { + "epoch": 0.3, + "learning_rate": 5.689602575799904e-06, + "loss": 0.9884, + "step": 1432 + }, + { + "epoch": 0.3, + "learning_rate": 5.689166816046217e-06, + "loss": 1.0005, + "step": 1433 + }, + { + "epoch": 0.3, + "learning_rate": 5.688730767341747e-06, + "loss": 0.9108, + "step": 1434 + }, + { + "epoch": 0.3, + "learning_rate": 5.688294429733347e-06, + "loss": 0.9662, + "step": 1435 + }, + { + "epoch": 0.3, + "learning_rate": 5.687857803267904e-06, + "loss": 1.0749, + "step": 1436 + }, + { + "epoch": 0.3, + "learning_rate": 5.6874208879923305e-06, + "loss": 0.9167, + "step": 1437 + }, + { + "epoch": 0.3, + "learning_rate": 5.6869836839535754e-06, + "loss": 1.001, + "step": 1438 + }, + { + "epoch": 0.3, + "learning_rate": 5.686546191198615e-06, + "loss": 1.0438, + "step": 1439 + }, + { + "epoch": 0.3, + "learning_rate": 5.6861084097744574e-06, + "loss": 1.1308, + "step": 1440 + }, + { + "epoch": 0.3, + "learning_rate": 5.685670339728143e-06, + "loss": 1.0159, + "step": 1441 + }, + { + "epoch": 0.3, + "learning_rate": 5.685231981106742e-06, + "loss": 0.9382, + "step": 1442 + }, + { + "epoch": 0.3, + "learning_rate": 5.684793333957357e-06, + "loss": 1.0651, + "step": 1443 + }, + { + "epoch": 0.3, + "learning_rate": 5.68435439832712e-06, + "loss": 0.9745, + "step": 1444 + }, + { + "epoch": 0.3, + "learning_rate": 5.6839151742631936e-06, + "loss": 0.937, + "step": 1445 + }, + { + "epoch": 0.3, + "learning_rate": 5.6834756618127734e-06, + "loss": 1.0221, + "step": 1446 + }, + { + "epoch": 0.3, + "learning_rate": 5.683035861023085e-06, + "loss": 1.2318, + "step": 1447 + }, + { + "epoch": 0.3, + "learning_rate": 5.682595771941385e-06, + "loss": 1.256, + "step": 1448 + }, + { + "epoch": 0.3, + "learning_rate": 5.6821553946149605e-06, + "loss": 0.9983, + "step": 1449 + }, + { + "epoch": 0.3, + "learning_rate": 5.68171472909113e-06, + "loss": 1.0316, + "step": 1450 + }, + { + "epoch": 0.3, + "learning_rate": 5.681273775417244e-06, + "loss": 0.9687, + "step": 1451 + }, + { + "epoch": 0.3, + "learning_rate": 5.680832533640682e-06, + "loss": 1.1146, + "step": 1452 + }, + { + "epoch": 0.3, + "learning_rate": 5.680391003808855e-06, + "loss": 0.9232, + "step": 1453 + }, + { + "epoch": 0.3, + "learning_rate": 5.679949185969206e-06, + "loss": 0.9343, + "step": 1454 + }, + { + "epoch": 0.3, + "learning_rate": 5.679507080169208e-06, + "loss": 0.9131, + "step": 1455 + }, + { + "epoch": 0.3, + "learning_rate": 5.6790646864563665e-06, + "loss": 1.0548, + "step": 1456 + }, + { + "epoch": 0.3, + "learning_rate": 5.6786220048782155e-06, + "loss": 1.089, + "step": 1457 + }, + { + "epoch": 0.3, + "learning_rate": 5.67817903548232e-06, + "loss": 0.7688, + "step": 1458 + }, + { + "epoch": 0.3, + "learning_rate": 5.6777357783162786e-06, + "loss": 0.8813, + "step": 1459 + }, + { + "epoch": 0.3, + "learning_rate": 5.677292233427719e-06, + "loss": 1.0575, + "step": 1460 + }, + { + "epoch": 0.3, + "learning_rate": 5.6768484008643e-06, + "loss": 0.8079, + "step": 1461 + }, + { + "epoch": 0.3, + "learning_rate": 5.676404280673712e-06, + "loss": 0.9165, + "step": 1462 + }, + { + "epoch": 0.3, + "learning_rate": 5.6759598729036745e-06, + "loss": 0.9849, + "step": 1463 + }, + { + "epoch": 0.3, + "learning_rate": 5.67551517760194e-06, + "loss": 0.8789, + "step": 1464 + }, + { + "epoch": 0.3, + "learning_rate": 5.675070194816289e-06, + "loss": 1.0595, + "step": 1465 + }, + { + "epoch": 0.3, + "learning_rate": 5.674624924594538e-06, + "loss": 0.9151, + "step": 1466 + }, + { + "epoch": 0.31, + "learning_rate": 5.674179366984529e-06, + "loss": 0.8268, + "step": 1467 + }, + { + "epoch": 0.31, + "learning_rate": 5.673733522034139e-06, + "loss": 1.0386, + "step": 1468 + }, + { + "epoch": 0.31, + "learning_rate": 5.673287389791271e-06, + "loss": 1.1315, + "step": 1469 + }, + { + "epoch": 0.31, + "learning_rate": 5.672840970303866e-06, + "loss": 0.7857, + "step": 1470 + }, + { + "epoch": 0.31, + "learning_rate": 5.672394263619888e-06, + "loss": 0.9271, + "step": 1471 + }, + { + "epoch": 0.31, + "learning_rate": 5.6719472697873376e-06, + "loss": 0.9606, + "step": 1472 + }, + { + "epoch": 0.31, + "learning_rate": 5.671499988854243e-06, + "loss": 1.0088, + "step": 1473 + }, + { + "epoch": 0.31, + "learning_rate": 5.671052420868667e-06, + "loss": 1.0728, + "step": 1474 + }, + { + "epoch": 0.31, + "learning_rate": 5.670604565878697e-06, + "loss": 0.9808, + "step": 1475 + }, + { + "epoch": 0.31, + "learning_rate": 5.670156423932457e-06, + "loss": 1.0131, + "step": 1476 + }, + { + "epoch": 0.31, + "learning_rate": 5.669707995078101e-06, + "loss": 0.813, + "step": 1477 + }, + { + "epoch": 0.31, + "learning_rate": 5.66925927936381e-06, + "loss": 1.1129, + "step": 1478 + }, + { + "epoch": 0.31, + "learning_rate": 5.668810276837801e-06, + "loss": 1.2084, + "step": 1479 + }, + { + "epoch": 0.31, + "learning_rate": 5.668360987548318e-06, + "loss": 1.0459, + "step": 1480 + }, + { + "epoch": 0.31, + "learning_rate": 5.667911411543637e-06, + "loss": 0.7099, + "step": 1481 + }, + { + "epoch": 0.31, + "learning_rate": 5.667461548872066e-06, + "loss": 1.1324, + "step": 1482 + }, + { + "epoch": 0.31, + "learning_rate": 5.667011399581941e-06, + "loss": 0.9818, + "step": 1483 + }, + { + "epoch": 0.31, + "learning_rate": 5.666560963721632e-06, + "loss": 0.9237, + "step": 1484 + }, + { + "epoch": 0.31, + "learning_rate": 5.666110241339538e-06, + "loss": 0.8925, + "step": 1485 + }, + { + "epoch": 0.31, + "learning_rate": 5.665659232484088e-06, + "loss": 0.9249, + "step": 1486 + }, + { + "epoch": 0.31, + "learning_rate": 5.665207937203744e-06, + "loss": 0.9443, + "step": 1487 + }, + { + "epoch": 0.31, + "learning_rate": 5.6647563555469975e-06, + "loss": 0.971, + "step": 1488 + }, + { + "epoch": 0.31, + "learning_rate": 5.664304487562371e-06, + "loss": 0.9653, + "step": 1489 + }, + { + "epoch": 0.31, + "learning_rate": 5.663852333298417e-06, + "loss": 0.7548, + "step": 1490 + }, + { + "epoch": 0.31, + "learning_rate": 5.66339989280372e-06, + "loss": 0.8874, + "step": 1491 + }, + { + "epoch": 0.31, + "learning_rate": 5.662947166126894e-06, + "loss": 1.0038, + "step": 1492 + }, + { + "epoch": 0.31, + "learning_rate": 5.662494153316586e-06, + "loss": 1.1689, + "step": 1493 + }, + { + "epoch": 0.31, + "learning_rate": 5.66204085442147e-06, + "loss": 1.0142, + "step": 1494 + }, + { + "epoch": 0.31, + "learning_rate": 5.6615872694902555e-06, + "loss": 1.1814, + "step": 1495 + }, + { + "epoch": 0.31, + "learning_rate": 5.661133398571679e-06, + "loss": 0.8549, + "step": 1496 + }, + { + "epoch": 0.31, + "learning_rate": 5.660679241714507e-06, + "loss": 1.0472, + "step": 1497 + }, + { + "epoch": 0.31, + "learning_rate": 5.660224798967541e-06, + "loss": 0.8002, + "step": 1498 + }, + { + "epoch": 0.31, + "learning_rate": 5.659770070379611e-06, + "loss": 1.1598, + "step": 1499 + }, + { + "epoch": 0.31, + "learning_rate": 5.659315055999576e-06, + "loss": 0.9095, + "step": 1500 + }, + { + "epoch": 0.31, + "learning_rate": 5.658859755876329e-06, + "loss": 1.0044, + "step": 1501 + }, + { + "epoch": 0.31, + "learning_rate": 5.65840417005879e-06, + "loss": 0.9663, + "step": 1502 + }, + { + "epoch": 0.31, + "learning_rate": 5.6579482985959115e-06, + "loss": 0.9543, + "step": 1503 + }, + { + "epoch": 0.31, + "learning_rate": 5.65749214153668e-06, + "loss": 1.0791, + "step": 1504 + }, + { + "epoch": 0.31, + "learning_rate": 5.657035698930107e-06, + "loss": 1.134, + "step": 1505 + }, + { + "epoch": 0.31, + "learning_rate": 5.656578970825238e-06, + "loss": 1.0719, + "step": 1506 + }, + { + "epoch": 0.31, + "learning_rate": 5.656121957271147e-06, + "loss": 0.8976, + "step": 1507 + }, + { + "epoch": 0.31, + "learning_rate": 5.655664658316942e-06, + "loss": 1.1163, + "step": 1508 + }, + { + "epoch": 0.31, + "learning_rate": 5.655207074011758e-06, + "loss": 0.8294, + "step": 1509 + }, + { + "epoch": 0.31, + "learning_rate": 5.654749204404765e-06, + "loss": 1.0142, + "step": 1510 + }, + { + "epoch": 0.31, + "learning_rate": 5.654291049545159e-06, + "loss": 0.9537, + "step": 1511 + }, + { + "epoch": 0.31, + "learning_rate": 5.653832609482169e-06, + "loss": 0.7829, + "step": 1512 + }, + { + "epoch": 0.31, + "learning_rate": 5.653373884265054e-06, + "loss": 0.936, + "step": 1513 + }, + { + "epoch": 0.31, + "learning_rate": 5.652914873943105e-06, + "loss": 0.7713, + "step": 1514 + }, + { + "epoch": 0.32, + "learning_rate": 5.652455578565642e-06, + "loss": 0.9055, + "step": 1515 + }, + { + "epoch": 0.32, + "learning_rate": 5.651995998182016e-06, + "loss": 0.9663, + "step": 1516 + }, + { + "epoch": 0.32, + "learning_rate": 5.65153613284161e-06, + "loss": 0.9383, + "step": 1517 + }, + { + "epoch": 0.32, + "learning_rate": 5.651075982593835e-06, + "loss": 0.9123, + "step": 1518 + }, + { + "epoch": 0.32, + "learning_rate": 5.650615547488135e-06, + "loss": 0.8806, + "step": 1519 + }, + { + "epoch": 0.32, + "learning_rate": 5.650154827573983e-06, + "loss": 0.9273, + "step": 1520 + }, + { + "epoch": 0.32, + "learning_rate": 5.649693822900885e-06, + "loss": 0.7591, + "step": 1521 + }, + { + "epoch": 0.32, + "learning_rate": 5.649232533518374e-06, + "loss": 0.9271, + "step": 1522 + }, + { + "epoch": 0.32, + "learning_rate": 5.6487709594760175e-06, + "loss": 0.9739, + "step": 1523 + }, + { + "epoch": 0.32, + "learning_rate": 5.648309100823409e-06, + "loss": 0.9957, + "step": 1524 + }, + { + "epoch": 0.32, + "learning_rate": 5.6478469576101775e-06, + "loss": 0.9038, + "step": 1525 + }, + { + "epoch": 0.32, + "learning_rate": 5.647384529885978e-06, + "loss": 0.8403, + "step": 1526 + }, + { + "epoch": 0.32, + "learning_rate": 5.646921817700501e-06, + "loss": 0.9371, + "step": 1527 + }, + { + "epoch": 0.32, + "learning_rate": 5.646458821103463e-06, + "loss": 0.9699, + "step": 1528 + }, + { + "epoch": 0.32, + "learning_rate": 5.645995540144614e-06, + "loss": 0.8837, + "step": 1529 + }, + { + "epoch": 0.32, + "learning_rate": 5.645531974873733e-06, + "loss": 0.8479, + "step": 1530 + }, + { + "epoch": 0.32, + "learning_rate": 5.64506812534063e-06, + "loss": 0.8054, + "step": 1531 + }, + { + "epoch": 0.32, + "learning_rate": 5.6446039915951456e-06, + "loss": 1.014, + "step": 1532 + }, + { + "epoch": 0.32, + "learning_rate": 5.644139573687151e-06, + "loss": 1.0224, + "step": 1533 + }, + { + "epoch": 0.32, + "learning_rate": 5.643674871666547e-06, + "loss": 0.9086, + "step": 1534 + }, + { + "epoch": 0.32, + "learning_rate": 5.643209885583268e-06, + "loss": 0.8984, + "step": 1535 + }, + { + "epoch": 0.32, + "learning_rate": 5.642744615487275e-06, + "loss": 0.9008, + "step": 1536 + }, + { + "epoch": 0.32, + "learning_rate": 5.642279061428563e-06, + "loss": 1.0966, + "step": 1537 + }, + { + "epoch": 0.32, + "learning_rate": 5.6418132234571514e-06, + "loss": 0.9674, + "step": 1538 + }, + { + "epoch": 0.32, + "learning_rate": 5.6413471016231e-06, + "loss": 0.9553, + "step": 1539 + }, + { + "epoch": 0.32, + "learning_rate": 5.64088069597649e-06, + "loss": 0.9807, + "step": 1540 + }, + { + "epoch": 0.32, + "learning_rate": 5.640414006567438e-06, + "loss": 0.8946, + "step": 1541 + }, + { + "epoch": 0.32, + "learning_rate": 5.639947033446088e-06, + "loss": 0.8217, + "step": 1542 + }, + { + "epoch": 0.32, + "learning_rate": 5.639479776662619e-06, + "loss": 1.1152, + "step": 1543 + }, + { + "epoch": 0.32, + "learning_rate": 5.639012236267237e-06, + "loss": 1.1307, + "step": 1544 + }, + { + "epoch": 0.32, + "learning_rate": 5.638544412310177e-06, + "loss": 1.0581, + "step": 1545 + }, + { + "epoch": 0.32, + "learning_rate": 5.638076304841709e-06, + "loss": 0.8373, + "step": 1546 + }, + { + "epoch": 0.32, + "learning_rate": 5.637607913912129e-06, + "loss": 0.8884, + "step": 1547 + }, + { + "epoch": 0.32, + "learning_rate": 5.637139239571769e-06, + "loss": 0.9577, + "step": 1548 + }, + { + "epoch": 0.32, + "learning_rate": 5.636670281870984e-06, + "loss": 0.9501, + "step": 1549 + }, + { + "epoch": 0.32, + "learning_rate": 5.636201040860168e-06, + "loss": 1.0621, + "step": 1550 + }, + { + "epoch": 0.32, + "learning_rate": 5.635731516589735e-06, + "loss": 0.8094, + "step": 1551 + }, + { + "epoch": 0.32, + "learning_rate": 5.6352617091101405e-06, + "loss": 1.3197, + "step": 1552 + }, + { + "epoch": 0.32, + "learning_rate": 5.634791618471863e-06, + "loss": 0.9209, + "step": 1553 + }, + { + "epoch": 0.32, + "learning_rate": 5.634321244725414e-06, + "loss": 0.8965, + "step": 1554 + }, + { + "epoch": 0.32, + "learning_rate": 5.633850587921336e-06, + "loss": 0.899, + "step": 1555 + }, + { + "epoch": 0.32, + "learning_rate": 5.6333796481102e-06, + "loss": 1.0753, + "step": 1556 + }, + { + "epoch": 0.32, + "learning_rate": 5.632908425342608e-06, + "loss": 0.9815, + "step": 1557 + }, + { + "epoch": 0.32, + "learning_rate": 5.632436919669194e-06, + "loss": 0.7991, + "step": 1558 + }, + { + "epoch": 0.32, + "learning_rate": 5.6319651311406205e-06, + "loss": 0.9746, + "step": 1559 + }, + { + "epoch": 0.32, + "learning_rate": 5.631493059807582e-06, + "loss": 0.8728, + "step": 1560 + }, + { + "epoch": 0.32, + "learning_rate": 5.631020705720802e-06, + "loss": 1.0696, + "step": 1561 + }, + { + "epoch": 0.32, + "learning_rate": 5.630548068931034e-06, + "loss": 0.9315, + "step": 1562 + }, + { + "epoch": 0.33, + "learning_rate": 5.630075149489064e-06, + "loss": 0.966, + "step": 1563 + }, + { + "epoch": 0.33, + "learning_rate": 5.6296019474457086e-06, + "loss": 1.0017, + "step": 1564 + }, + { + "epoch": 0.33, + "learning_rate": 5.6291284628518095e-06, + "loss": 1.1794, + "step": 1565 + }, + { + "epoch": 0.33, + "learning_rate": 5.628654695758247e-06, + "loss": 0.8329, + "step": 1566 + }, + { + "epoch": 0.33, + "learning_rate": 5.628180646215922e-06, + "loss": 0.995, + "step": 1567 + }, + { + "epoch": 0.33, + "learning_rate": 5.627706314275776e-06, + "loss": 0.8779, + "step": 1568 + }, + { + "epoch": 0.33, + "learning_rate": 5.627231699988774e-06, + "loss": 1.0955, + "step": 1569 + }, + { + "epoch": 0.33, + "learning_rate": 5.626756803405913e-06, + "loss": 0.9331, + "step": 1570 + }, + { + "epoch": 0.33, + "learning_rate": 5.62628162457822e-06, + "loss": 0.8337, + "step": 1571 + }, + { + "epoch": 0.33, + "learning_rate": 5.625806163556755e-06, + "loss": 1.0829, + "step": 1572 + }, + { + "epoch": 0.33, + "learning_rate": 5.625330420392604e-06, + "loss": 0.8867, + "step": 1573 + }, + { + "epoch": 0.33, + "learning_rate": 5.624854395136888e-06, + "loss": 0.8789, + "step": 1574 + }, + { + "epoch": 0.33, + "learning_rate": 5.6243780878407544e-06, + "loss": 1.1032, + "step": 1575 + }, + { + "epoch": 0.33, + "learning_rate": 5.623901498555382e-06, + "loss": 1.0999, + "step": 1576 + }, + { + "epoch": 0.33, + "learning_rate": 5.623424627331981e-06, + "loss": 0.989, + "step": 1577 + }, + { + "epoch": 0.33, + "learning_rate": 5.6229474742217905e-06, + "loss": 1.158, + "step": 1578 + }, + { + "epoch": 0.33, + "learning_rate": 5.622470039276082e-06, + "loss": 0.8066, + "step": 1579 + }, + { + "epoch": 0.33, + "learning_rate": 5.621992322546155e-06, + "loss": 0.9877, + "step": 1580 + }, + { + "epoch": 0.33, + "learning_rate": 5.621514324083339e-06, + "loss": 0.8868, + "step": 1581 + }, + { + "epoch": 0.33, + "learning_rate": 5.621036043938996e-06, + "loss": 0.8427, + "step": 1582 + }, + { + "epoch": 0.33, + "learning_rate": 5.620557482164517e-06, + "loss": 0.9638, + "step": 1583 + }, + { + "epoch": 0.33, + "learning_rate": 5.620078638811325e-06, + "loss": 0.7697, + "step": 1584 + }, + { + "epoch": 0.33, + "learning_rate": 5.6195995139308695e-06, + "loss": 0.8459, + "step": 1585 + }, + { + "epoch": 0.33, + "learning_rate": 5.619120107574632e-06, + "loss": 1.1176, + "step": 1586 + }, + { + "epoch": 0.33, + "learning_rate": 5.618640419794127e-06, + "loss": 0.8746, + "step": 1587 + }, + { + "epoch": 0.33, + "learning_rate": 5.618160450640896e-06, + "loss": 0.8292, + "step": 1588 + }, + { + "epoch": 0.33, + "learning_rate": 5.61768020016651e-06, + "loss": 1.0855, + "step": 1589 + }, + { + "epoch": 0.33, + "learning_rate": 5.617199668422575e-06, + "loss": 1.1057, + "step": 1590 + }, + { + "epoch": 0.33, + "learning_rate": 5.616718855460721e-06, + "loss": 1.0926, + "step": 1591 + }, + { + "epoch": 0.33, + "learning_rate": 5.6162377613326135e-06, + "loss": 0.8591, + "step": 1592 + }, + { + "epoch": 0.33, + "learning_rate": 5.615756386089946e-06, + "loss": 0.8661, + "step": 1593 + }, + { + "epoch": 0.33, + "learning_rate": 5.61527472978444e-06, + "loss": 1.0596, + "step": 1594 + }, + { + "epoch": 0.33, + "learning_rate": 5.614792792467851e-06, + "loss": 1.0261, + "step": 1595 + }, + { + "epoch": 0.33, + "learning_rate": 5.614310574191964e-06, + "loss": 1.0694, + "step": 1596 + }, + { + "epoch": 0.33, + "learning_rate": 5.613828075008591e-06, + "loss": 0.7104, + "step": 1597 + }, + { + "epoch": 0.33, + "learning_rate": 5.6133452949695794e-06, + "loss": 1.0741, + "step": 1598 + }, + { + "epoch": 0.33, + "learning_rate": 5.612862234126802e-06, + "loss": 0.8046, + "step": 1599 + }, + { + "epoch": 0.33, + "learning_rate": 5.612378892532163e-06, + "loss": 1.1926, + "step": 1600 + }, + { + "epoch": 0.33, + "learning_rate": 5.611895270237599e-06, + "loss": 0.9305, + "step": 1601 + }, + { + "epoch": 0.33, + "learning_rate": 5.6114113672950745e-06, + "loss": 0.9565, + "step": 1602 + }, + { + "epoch": 0.33, + "learning_rate": 5.610927183756585e-06, + "loss": 1.038, + "step": 1603 + }, + { + "epoch": 0.33, + "learning_rate": 5.610442719674156e-06, + "loss": 0.9743, + "step": 1604 + }, + { + "epoch": 0.33, + "learning_rate": 5.609957975099843e-06, + "loss": 0.7688, + "step": 1605 + }, + { + "epoch": 0.33, + "learning_rate": 5.609472950085732e-06, + "loss": 0.9502, + "step": 1606 + }, + { + "epoch": 0.33, + "learning_rate": 5.608987644683938e-06, + "loss": 1.1945, + "step": 1607 + }, + { + "epoch": 0.33, + "learning_rate": 5.6085020589466095e-06, + "loss": 0.8844, + "step": 1608 + }, + { + "epoch": 0.33, + "learning_rate": 5.608016192925919e-06, + "loss": 1.1184, + "step": 1609 + }, + { + "epoch": 0.33, + "learning_rate": 5.607530046674076e-06, + "loss": 1.1156, + "step": 1610 + }, + { + "epoch": 0.34, + "learning_rate": 5.6070436202433146e-06, + "loss": 0.8679, + "step": 1611 + }, + { + "epoch": 0.34, + "learning_rate": 5.606556913685902e-06, + "loss": 0.8645, + "step": 1612 + }, + { + "epoch": 0.34, + "learning_rate": 5.606069927054136e-06, + "loss": 0.9469, + "step": 1613 + }, + { + "epoch": 0.34, + "learning_rate": 5.605582660400342e-06, + "loss": 0.8942, + "step": 1614 + }, + { + "epoch": 0.34, + "learning_rate": 5.605095113776877e-06, + "loss": 0.9275, + "step": 1615 + }, + { + "epoch": 0.34, + "learning_rate": 5.604607287236128e-06, + "loss": 0.9162, + "step": 1616 + }, + { + "epoch": 0.34, + "learning_rate": 5.604119180830511e-06, + "loss": 0.8723, + "step": 1617 + }, + { + "epoch": 0.34, + "learning_rate": 5.603630794612475e-06, + "loss": 0.9831, + "step": 1618 + }, + { + "epoch": 0.34, + "learning_rate": 5.6031421286344956e-06, + "loss": 0.8881, + "step": 1619 + }, + { + "epoch": 0.34, + "learning_rate": 5.60265318294908e-06, + "loss": 0.883, + "step": 1620 + }, + { + "epoch": 0.34, + "learning_rate": 5.602163957608766e-06, + "loss": 1.02, + "step": 1621 + }, + { + "epoch": 0.34, + "learning_rate": 5.60167445266612e-06, + "loss": 0.9012, + "step": 1622 + }, + { + "epoch": 0.34, + "learning_rate": 5.60118466817374e-06, + "loss": 0.9556, + "step": 1623 + }, + { + "epoch": 0.34, + "learning_rate": 5.6006946041842525e-06, + "loss": 0.8123, + "step": 1624 + }, + { + "epoch": 0.34, + "learning_rate": 5.600204260750316e-06, + "loss": 0.9581, + "step": 1625 + }, + { + "epoch": 0.34, + "learning_rate": 5.599713637924617e-06, + "loss": 1.1791, + "step": 1626 + }, + { + "epoch": 0.34, + "learning_rate": 5.599222735759873e-06, + "loss": 1.0577, + "step": 1627 + }, + { + "epoch": 0.34, + "learning_rate": 5.598731554308833e-06, + "loss": 0.7785, + "step": 1628 + }, + { + "epoch": 0.34, + "learning_rate": 5.598240093624271e-06, + "loss": 1.005, + "step": 1629 + }, + { + "epoch": 0.34, + "learning_rate": 5.597748353758997e-06, + "loss": 0.8346, + "step": 1630 + }, + { + "epoch": 0.34, + "learning_rate": 5.5972563347658465e-06, + "loss": 1.2594, + "step": 1631 + }, + { + "epoch": 0.34, + "learning_rate": 5.59676403669769e-06, + "loss": 0.7873, + "step": 1632 + }, + { + "epoch": 0.34, + "learning_rate": 5.596271459607422e-06, + "loss": 0.8596, + "step": 1633 + }, + { + "epoch": 0.34, + "learning_rate": 5.595778603547972e-06, + "loss": 0.9309, + "step": 1634 + }, + { + "epoch": 0.34, + "learning_rate": 5.595285468572295e-06, + "loss": 0.8184, + "step": 1635 + }, + { + "epoch": 0.34, + "learning_rate": 5.59479205473338e-06, + "loss": 0.886, + "step": 1636 + }, + { + "epoch": 0.34, + "learning_rate": 5.594298362084245e-06, + "loss": 0.8726, + "step": 1637 + }, + { + "epoch": 0.34, + "learning_rate": 5.5938043906779344e-06, + "loss": 0.9745, + "step": 1638 + }, + { + "epoch": 0.34, + "learning_rate": 5.593310140567528e-06, + "loss": 0.9615, + "step": 1639 + }, + { + "epoch": 0.34, + "learning_rate": 5.592815611806132e-06, + "loss": 1.0554, + "step": 1640 + }, + { + "epoch": 0.34, + "learning_rate": 5.592320804446884e-06, + "loss": 0.7705, + "step": 1641 + }, + { + "epoch": 0.34, + "learning_rate": 5.59182571854295e-06, + "loss": 0.8278, + "step": 1642 + }, + { + "epoch": 0.34, + "learning_rate": 5.591330354147527e-06, + "loss": 1.066, + "step": 1643 + }, + { + "epoch": 0.34, + "learning_rate": 5.590834711313843e-06, + "loss": 0.9443, + "step": 1644 + }, + { + "epoch": 0.34, + "learning_rate": 5.590338790095154e-06, + "loss": 0.9332, + "step": 1645 + }, + { + "epoch": 0.34, + "learning_rate": 5.589842590544747e-06, + "loss": 0.9522, + "step": 1646 + }, + { + "epoch": 0.34, + "learning_rate": 5.589346112715938e-06, + "loss": 0.9132, + "step": 1647 + }, + { + "epoch": 0.34, + "learning_rate": 5.5888493566620745e-06, + "loss": 1.2172, + "step": 1648 + }, + { + "epoch": 0.34, + "learning_rate": 5.588352322436532e-06, + "loss": 0.864, + "step": 1649 + }, + { + "epoch": 0.34, + "learning_rate": 5.5878550100927175e-06, + "loss": 0.8093, + "step": 1650 + }, + { + "epoch": 0.34, + "learning_rate": 5.587357419684067e-06, + "loss": 0.9831, + "step": 1651 + }, + { + "epoch": 0.34, + "learning_rate": 5.586859551264046e-06, + "loss": 0.8647, + "step": 1652 + }, + { + "epoch": 0.34, + "learning_rate": 5.58636140488615e-06, + "loss": 0.9444, + "step": 1653 + }, + { + "epoch": 0.34, + "learning_rate": 5.5858629806039075e-06, + "loss": 0.8881, + "step": 1654 + }, + { + "epoch": 0.34, + "learning_rate": 5.585364278470871e-06, + "loss": 0.8805, + "step": 1655 + }, + { + "epoch": 0.34, + "learning_rate": 5.584865298540627e-06, + "loss": 0.9106, + "step": 1656 + }, + { + "epoch": 0.34, + "learning_rate": 5.584366040866792e-06, + "loss": 0.8013, + "step": 1657 + }, + { + "epoch": 0.34, + "learning_rate": 5.58386650550301e-06, + "loss": 1.0176, + "step": 1658 + }, + { + "epoch": 0.35, + "learning_rate": 5.583366692502956e-06, + "loss": 0.991, + "step": 1659 + }, + { + "epoch": 0.35, + "learning_rate": 5.582866601920336e-06, + "loss": 1.0193, + "step": 1660 + }, + { + "epoch": 0.35, + "learning_rate": 5.582366233808883e-06, + "loss": 1.0892, + "step": 1661 + }, + { + "epoch": 0.35, + "learning_rate": 5.581865588222363e-06, + "loss": 0.8265, + "step": 1662 + }, + { + "epoch": 0.35, + "learning_rate": 5.581364665214569e-06, + "loss": 1.102, + "step": 1663 + }, + { + "epoch": 0.35, + "learning_rate": 5.580863464839327e-06, + "loss": 0.9102, + "step": 1664 + }, + { + "epoch": 0.35, + "learning_rate": 5.580361987150489e-06, + "loss": 0.7991, + "step": 1665 + }, + { + "epoch": 0.35, + "learning_rate": 5.579860232201941e-06, + "loss": 0.6613, + "step": 1666 + }, + { + "epoch": 0.35, + "learning_rate": 5.579358200047593e-06, + "loss": 1.0032, + "step": 1667 + }, + { + "epoch": 0.35, + "learning_rate": 5.578855890741393e-06, + "loss": 1.1645, + "step": 1668 + }, + { + "epoch": 0.35, + "learning_rate": 5.57835330433731e-06, + "loss": 1.1878, + "step": 1669 + }, + { + "epoch": 0.35, + "learning_rate": 5.577850440889348e-06, + "loss": 0.967, + "step": 1670 + }, + { + "epoch": 0.35, + "learning_rate": 5.577347300451541e-06, + "loss": 0.9729, + "step": 1671 + }, + { + "epoch": 0.35, + "learning_rate": 5.5768438830779505e-06, + "loss": 1.1712, + "step": 1672 + }, + { + "epoch": 0.35, + "learning_rate": 5.576340188822669e-06, + "loss": 0.9076, + "step": 1673 + }, + { + "epoch": 0.35, + "learning_rate": 5.575836217739817e-06, + "loss": 0.9007, + "step": 1674 + }, + { + "epoch": 0.35, + "learning_rate": 5.575331969883548e-06, + "loss": 0.8823, + "step": 1675 + }, + { + "epoch": 0.35, + "learning_rate": 5.574827445308042e-06, + "loss": 1.0988, + "step": 1676 + }, + { + "epoch": 0.35, + "learning_rate": 5.574322644067511e-06, + "loss": 0.765, + "step": 1677 + }, + { + "epoch": 0.35, + "learning_rate": 5.573817566216195e-06, + "loss": 1.0129, + "step": 1678 + }, + { + "epoch": 0.35, + "learning_rate": 5.573312211808367e-06, + "loss": 0.9812, + "step": 1679 + }, + { + "epoch": 0.35, + "learning_rate": 5.572806580898323e-06, + "loss": 0.846, + "step": 1680 + }, + { + "epoch": 0.35, + "learning_rate": 5.572300673540397e-06, + "loss": 0.9062, + "step": 1681 + }, + { + "epoch": 0.35, + "learning_rate": 5.571794489788947e-06, + "loss": 0.9187, + "step": 1682 + }, + { + "epoch": 0.35, + "learning_rate": 5.571288029698362e-06, + "loss": 0.9705, + "step": 1683 + }, + { + "epoch": 0.35, + "learning_rate": 5.5707812933230625e-06, + "loss": 1.2218, + "step": 1684 + }, + { + "epoch": 0.35, + "learning_rate": 5.570274280717497e-06, + "loss": 0.9861, + "step": 1685 + }, + { + "epoch": 0.35, + "learning_rate": 5.569766991936142e-06, + "loss": 1.0415, + "step": 1686 + }, + { + "epoch": 0.35, + "learning_rate": 5.5692594270335074e-06, + "loss": 0.841, + "step": 1687 + }, + { + "epoch": 0.35, + "learning_rate": 5.568751586064132e-06, + "loss": 0.9141, + "step": 1688 + }, + { + "epoch": 0.35, + "learning_rate": 5.56824346908258e-06, + "loss": 1.0927, + "step": 1689 + }, + { + "epoch": 0.35, + "learning_rate": 5.567735076143452e-06, + "loss": 1.2106, + "step": 1690 + }, + { + "epoch": 0.35, + "learning_rate": 5.567226407301373e-06, + "loss": 1.1638, + "step": 1691 + }, + { + "epoch": 0.35, + "learning_rate": 5.566717462610999e-06, + "loss": 0.8556, + "step": 1692 + }, + { + "epoch": 0.35, + "learning_rate": 5.566208242127018e-06, + "loss": 0.7376, + "step": 1693 + }, + { + "epoch": 0.35, + "learning_rate": 5.565698745904143e-06, + "loss": 0.867, + "step": 1694 + }, + { + "epoch": 0.35, + "learning_rate": 5.565188973997122e-06, + "loss": 1.131, + "step": 1695 + }, + { + "epoch": 0.35, + "learning_rate": 5.564678926460729e-06, + "loss": 0.9388, + "step": 1696 + }, + { + "epoch": 0.35, + "learning_rate": 5.564168603349768e-06, + "loss": 1.0934, + "step": 1697 + }, + { + "epoch": 0.35, + "learning_rate": 5.563658004719073e-06, + "loss": 1.0889, + "step": 1698 + }, + { + "epoch": 0.35, + "learning_rate": 5.563147130623509e-06, + "loss": 1.3718, + "step": 1699 + }, + { + "epoch": 0.35, + "learning_rate": 5.562635981117968e-06, + "loss": 0.8334, + "step": 1700 + }, + { + "epoch": 0.35, + "learning_rate": 5.562124556257374e-06, + "loss": 1.0896, + "step": 1701 + }, + { + "epoch": 0.35, + "learning_rate": 5.56161285609668e-06, + "loss": 0.8314, + "step": 1702 + }, + { + "epoch": 0.35, + "learning_rate": 5.561100880690866e-06, + "loss": 0.9614, + "step": 1703 + }, + { + "epoch": 0.35, + "learning_rate": 5.560588630094946e-06, + "loss": 1.0356, + "step": 1704 + }, + { + "epoch": 0.35, + "learning_rate": 5.560076104363961e-06, + "loss": 1.044, + "step": 1705 + }, + { + "epoch": 0.35, + "learning_rate": 5.559563303552979e-06, + "loss": 1.0273, + "step": 1706 + }, + { + "epoch": 0.36, + "learning_rate": 5.559050227717105e-06, + "loss": 1.0569, + "step": 1707 + }, + { + "epoch": 0.36, + "learning_rate": 5.558536876911466e-06, + "loss": 0.9424, + "step": 1708 + }, + { + "epoch": 0.36, + "learning_rate": 5.5580232511912225e-06, + "loss": 0.9129, + "step": 1709 + }, + { + "epoch": 0.36, + "learning_rate": 5.557509350611563e-06, + "loss": 0.8904, + "step": 1710 + }, + { + "epoch": 0.36, + "learning_rate": 5.556995175227706e-06, + "loss": 0.9675, + "step": 1711 + }, + { + "epoch": 0.36, + "learning_rate": 5.556480725094899e-06, + "loss": 0.942, + "step": 1712 + }, + { + "epoch": 0.36, + "learning_rate": 5.5559660002684215e-06, + "loss": 0.8855, + "step": 1713 + }, + { + "epoch": 0.36, + "learning_rate": 5.55545100080358e-06, + "loss": 1.0582, + "step": 1714 + }, + { + "epoch": 0.36, + "learning_rate": 5.55493572675571e-06, + "loss": 0.9462, + "step": 1715 + }, + { + "epoch": 0.36, + "learning_rate": 5.554420178180179e-06, + "loss": 0.9992, + "step": 1716 + }, + { + "epoch": 0.36, + "learning_rate": 5.553904355132382e-06, + "loss": 0.9539, + "step": 1717 + }, + { + "epoch": 0.36, + "learning_rate": 5.553388257667744e-06, + "loss": 0.8386, + "step": 1718 + }, + { + "epoch": 0.36, + "learning_rate": 5.5528718858417206e-06, + "loss": 0.7376, + "step": 1719 + }, + { + "epoch": 0.36, + "learning_rate": 5.552355239709795e-06, + "loss": 1.2237, + "step": 1720 + }, + { + "epoch": 0.36, + "learning_rate": 5.5518383193274804e-06, + "loss": 0.9934, + "step": 1721 + }, + { + "epoch": 0.36, + "learning_rate": 5.551321124750321e-06, + "loss": 0.9057, + "step": 1722 + }, + { + "epoch": 0.36, + "learning_rate": 5.550803656033889e-06, + "loss": 1.1378, + "step": 1723 + }, + { + "epoch": 0.36, + "learning_rate": 5.550285913233786e-06, + "loss": 1.3755, + "step": 1724 + }, + { + "epoch": 0.36, + "learning_rate": 5.5497678964056435e-06, + "loss": 0.9297, + "step": 1725 + }, + { + "epoch": 0.36, + "learning_rate": 5.5492496056051225e-06, + "loss": 0.9288, + "step": 1726 + }, + { + "epoch": 0.36, + "learning_rate": 5.548731040887913e-06, + "loss": 0.9087, + "step": 1727 + }, + { + "epoch": 0.36, + "learning_rate": 5.548212202309735e-06, + "loss": 0.9401, + "step": 1728 + }, + { + "epoch": 0.36, + "learning_rate": 5.547693089926339e-06, + "loss": 0.8736, + "step": 1729 + }, + { + "epoch": 0.36, + "learning_rate": 5.547173703793501e-06, + "loss": 0.7821, + "step": 1730 + }, + { + "epoch": 0.36, + "learning_rate": 5.5466540439670304e-06, + "loss": 0.9998, + "step": 1731 + }, + { + "epoch": 0.36, + "learning_rate": 5.546134110502765e-06, + "loss": 0.9619, + "step": 1732 + }, + { + "epoch": 0.36, + "learning_rate": 5.545613903456571e-06, + "loss": 0.9549, + "step": 1733 + }, + { + "epoch": 0.36, + "learning_rate": 5.5450934228843445e-06, + "loss": 0.9704, + "step": 1734 + }, + { + "epoch": 0.36, + "learning_rate": 5.544572668842011e-06, + "loss": 1.0626, + "step": 1735 + }, + { + "epoch": 0.36, + "learning_rate": 5.5440516413855264e-06, + "loss": 0.9908, + "step": 1736 + }, + { + "epoch": 0.36, + "learning_rate": 5.543530340570874e-06, + "loss": 1.0535, + "step": 1737 + }, + { + "epoch": 0.36, + "learning_rate": 5.543008766454069e-06, + "loss": 0.7915, + "step": 1738 + }, + { + "epoch": 0.36, + "learning_rate": 5.542486919091152e-06, + "loss": 1.1884, + "step": 1739 + }, + { + "epoch": 0.36, + "learning_rate": 5.541964798538199e-06, + "loss": 1.0338, + "step": 1740 + }, + { + "epoch": 0.36, + "learning_rate": 5.541442404851308e-06, + "loss": 0.948, + "step": 1741 + }, + { + "epoch": 0.36, + "learning_rate": 5.5409197380866125e-06, + "loss": 0.9903, + "step": 1742 + }, + { + "epoch": 0.36, + "learning_rate": 5.540396798300272e-06, + "loss": 1.0308, + "step": 1743 + }, + { + "epoch": 0.36, + "learning_rate": 5.539873585548478e-06, + "loss": 0.9442, + "step": 1744 + }, + { + "epoch": 0.36, + "learning_rate": 5.539350099887447e-06, + "loss": 0.9928, + "step": 1745 + }, + { + "epoch": 0.36, + "learning_rate": 5.53882634137343e-06, + "loss": 0.9865, + "step": 1746 + }, + { + "epoch": 0.36, + "learning_rate": 5.538302310062703e-06, + "loss": 0.8636, + "step": 1747 + }, + { + "epoch": 0.36, + "learning_rate": 5.537778006011575e-06, + "loss": 1.0871, + "step": 1748 + }, + { + "epoch": 0.36, + "learning_rate": 5.537253429276381e-06, + "loss": 0.9433, + "step": 1749 + }, + { + "epoch": 0.36, + "learning_rate": 5.536728579913486e-06, + "loss": 1.1315, + "step": 1750 + }, + { + "epoch": 0.36, + "learning_rate": 5.536203457979288e-06, + "loss": 0.9099, + "step": 1751 + }, + { + "epoch": 0.36, + "learning_rate": 5.535678063530207e-06, + "loss": 1.0932, + "step": 1752 + }, + { + "epoch": 0.36, + "learning_rate": 5.535152396622701e-06, + "loss": 0.9387, + "step": 1753 + }, + { + "epoch": 0.36, + "learning_rate": 5.534626457313251e-06, + "loss": 1.2429, + "step": 1754 + }, + { + "epoch": 0.37, + "learning_rate": 5.534100245658367e-06, + "loss": 0.8748, + "step": 1755 + }, + { + "epoch": 0.37, + "learning_rate": 5.533573761714594e-06, + "loss": 0.869, + "step": 1756 + }, + { + "epoch": 0.37, + "learning_rate": 5.5330470055385016e-06, + "loss": 0.9427, + "step": 1757 + }, + { + "epoch": 0.37, + "learning_rate": 5.532519977186688e-06, + "loss": 0.9897, + "step": 1758 + }, + { + "epoch": 0.37, + "learning_rate": 5.531992676715784e-06, + "loss": 0.7827, + "step": 1759 + }, + { + "epoch": 0.37, + "learning_rate": 5.5314651041824474e-06, + "loss": 0.8794, + "step": 1760 + }, + { + "epoch": 0.37, + "learning_rate": 5.530937259643366e-06, + "loss": 0.9551, + "step": 1761 + }, + { + "epoch": 0.37, + "learning_rate": 5.530409143155257e-06, + "loss": 0.9972, + "step": 1762 + }, + { + "epoch": 0.37, + "learning_rate": 5.529880754774865e-06, + "loss": 1.0226, + "step": 1763 + }, + { + "epoch": 0.37, + "learning_rate": 5.529352094558969e-06, + "loss": 1.1152, + "step": 1764 + }, + { + "epoch": 0.37, + "learning_rate": 5.528823162564369e-06, + "loss": 1.0828, + "step": 1765 + }, + { + "epoch": 0.37, + "learning_rate": 5.528293958847901e-06, + "loss": 0.8319, + "step": 1766 + }, + { + "epoch": 0.37, + "learning_rate": 5.527764483466427e-06, + "loss": 1.0631, + "step": 1767 + }, + { + "epoch": 0.37, + "learning_rate": 5.527234736476841e-06, + "loss": 1.0912, + "step": 1768 + }, + { + "epoch": 0.37, + "learning_rate": 5.526704717936061e-06, + "loss": 1.0184, + "step": 1769 + }, + { + "epoch": 0.37, + "learning_rate": 5.526174427901041e-06, + "loss": 1.1469, + "step": 1770 + }, + { + "epoch": 0.37, + "learning_rate": 5.525643866428758e-06, + "loss": 1.0254, + "step": 1771 + }, + { + "epoch": 0.37, + "learning_rate": 5.525113033576222e-06, + "loss": 0.7984, + "step": 1772 + }, + { + "epoch": 0.37, + "learning_rate": 5.524581929400471e-06, + "loss": 1.1154, + "step": 1773 + }, + { + "epoch": 0.37, + "learning_rate": 5.524050553958571e-06, + "loss": 1.1021, + "step": 1774 + }, + { + "epoch": 0.37, + "learning_rate": 5.52351890730762e-06, + "loss": 0.7883, + "step": 1775 + }, + { + "epoch": 0.37, + "learning_rate": 5.522986989504742e-06, + "loss": 0.9802, + "step": 1776 + }, + { + "epoch": 0.37, + "learning_rate": 5.522454800607093e-06, + "loss": 1.0664, + "step": 1777 + }, + { + "epoch": 0.37, + "learning_rate": 5.521922340671854e-06, + "loss": 1.0583, + "step": 1778 + }, + { + "epoch": 0.37, + "learning_rate": 5.521389609756241e-06, + "loss": 0.9338, + "step": 1779 + }, + { + "epoch": 0.37, + "learning_rate": 5.520856607917494e-06, + "loss": 1.2775, + "step": 1780 + }, + { + "epoch": 0.37, + "learning_rate": 5.520323335212884e-06, + "loss": 1.198, + "step": 1781 + }, + { + "epoch": 0.37, + "learning_rate": 5.519789791699712e-06, + "loss": 0.9278, + "step": 1782 + }, + { + "epoch": 0.37, + "learning_rate": 5.519255977435305e-06, + "loss": 1.1096, + "step": 1783 + }, + { + "epoch": 0.37, + "learning_rate": 5.518721892477025e-06, + "loss": 0.9343, + "step": 1784 + }, + { + "epoch": 0.37, + "learning_rate": 5.5181875368822564e-06, + "loss": 0.8726, + "step": 1785 + }, + { + "epoch": 0.37, + "learning_rate": 5.517652910708417e-06, + "loss": 0.7923, + "step": 1786 + }, + { + "epoch": 0.37, + "learning_rate": 5.517118014012952e-06, + "loss": 1.0582, + "step": 1787 + }, + { + "epoch": 0.37, + "learning_rate": 5.516582846853336e-06, + "loss": 0.8504, + "step": 1788 + }, + { + "epoch": 0.37, + "learning_rate": 5.516047409287073e-06, + "loss": 0.8887, + "step": 1789 + }, + { + "epoch": 0.37, + "learning_rate": 5.515511701371695e-06, + "loss": 0.8711, + "step": 1790 + }, + { + "epoch": 0.37, + "learning_rate": 5.514975723164764e-06, + "loss": 0.7009, + "step": 1791 + }, + { + "epoch": 0.37, + "learning_rate": 5.514439474723872e-06, + "loss": 0.9755, + "step": 1792 + }, + { + "epoch": 0.37, + "learning_rate": 5.513902956106636e-06, + "loss": 0.9389, + "step": 1793 + }, + { + "epoch": 0.37, + "learning_rate": 5.513366167370708e-06, + "loss": 0.9317, + "step": 1794 + }, + { + "epoch": 0.37, + "learning_rate": 5.512829108573765e-06, + "loss": 0.797, + "step": 1795 + }, + { + "epoch": 0.37, + "learning_rate": 5.512291779773513e-06, + "loss": 1.0107, + "step": 1796 + }, + { + "epoch": 0.37, + "learning_rate": 5.511754181027689e-06, + "loss": 1.2645, + "step": 1797 + }, + { + "epoch": 0.37, + "learning_rate": 5.511216312394056e-06, + "loss": 0.8949, + "step": 1798 + }, + { + "epoch": 0.37, + "learning_rate": 5.510678173930411e-06, + "loss": 0.9163, + "step": 1799 + }, + { + "epoch": 0.37, + "learning_rate": 5.510139765694575e-06, + "loss": 0.8183, + "step": 1800 + }, + { + "epoch": 0.37, + "learning_rate": 5.5096010877444005e-06, + "loss": 0.7972, + "step": 1801 + }, + { + "epoch": 0.37, + "learning_rate": 5.509062140137768e-06, + "loss": 0.9791, + "step": 1802 + }, + { + "epoch": 0.38, + "learning_rate": 5.508522922932587e-06, + "loss": 0.9208, + "step": 1803 + }, + { + "epoch": 0.38, + "learning_rate": 5.507983436186797e-06, + "loss": 1.0881, + "step": 1804 + }, + { + "epoch": 0.38, + "learning_rate": 5.507443679958367e-06, + "loss": 0.8703, + "step": 1805 + }, + { + "epoch": 0.38, + "learning_rate": 5.5069036543052915e-06, + "loss": 0.8585, + "step": 1806 + }, + { + "epoch": 0.38, + "learning_rate": 5.5063633592855976e-06, + "loss": 0.8495, + "step": 1807 + }, + { + "epoch": 0.38, + "learning_rate": 5.505822794957339e-06, + "loss": 1.1456, + "step": 1808 + }, + { + "epoch": 0.38, + "learning_rate": 5.505281961378601e-06, + "loss": 0.9648, + "step": 1809 + }, + { + "epoch": 0.38, + "learning_rate": 5.504740858607494e-06, + "loss": 1.4984, + "step": 1810 + }, + { + "epoch": 0.38, + "learning_rate": 5.504199486702161e-06, + "loss": 0.9821, + "step": 1811 + }, + { + "epoch": 0.38, + "learning_rate": 5.5036578457207715e-06, + "loss": 1.0686, + "step": 1812 + }, + { + "epoch": 0.38, + "learning_rate": 5.503115935721526e-06, + "loss": 0.8111, + "step": 1813 + }, + { + "epoch": 0.38, + "learning_rate": 5.50257375676265e-06, + "loss": 0.8554, + "step": 1814 + }, + { + "epoch": 0.38, + "learning_rate": 5.502031308902404e-06, + "loss": 0.9558, + "step": 1815 + }, + { + "epoch": 0.38, + "learning_rate": 5.501488592199072e-06, + "loss": 0.9142, + "step": 1816 + }, + { + "epoch": 0.38, + "learning_rate": 5.500945606710969e-06, + "loss": 0.8669, + "step": 1817 + }, + { + "epoch": 0.38, + "learning_rate": 5.500402352496439e-06, + "loss": 0.8755, + "step": 1818 + }, + { + "epoch": 0.38, + "learning_rate": 5.499858829613856e-06, + "loss": 0.8915, + "step": 1819 + }, + { + "epoch": 0.38, + "learning_rate": 5.499315038121618e-06, + "loss": 1.1692, + "step": 1820 + }, + { + "epoch": 0.38, + "learning_rate": 5.498770978078159e-06, + "loss": 0.8042, + "step": 1821 + }, + { + "epoch": 0.38, + "learning_rate": 5.4982266495419365e-06, + "loss": 1.0297, + "step": 1822 + }, + { + "epoch": 0.38, + "learning_rate": 5.4976820525714385e-06, + "loss": 0.8719, + "step": 1823 + }, + { + "epoch": 0.38, + "learning_rate": 5.497137187225182e-06, + "loss": 0.9056, + "step": 1824 + }, + { + "epoch": 0.38, + "learning_rate": 5.496592053561713e-06, + "loss": 1.0025, + "step": 1825 + }, + { + "epoch": 0.38, + "learning_rate": 5.496046651639607e-06, + "loss": 1.1314, + "step": 1826 + }, + { + "epoch": 0.38, + "learning_rate": 5.495500981517465e-06, + "loss": 0.9233, + "step": 1827 + }, + { + "epoch": 0.38, + "learning_rate": 5.4949550432539206e-06, + "loss": 0.9924, + "step": 1828 + }, + { + "epoch": 0.38, + "learning_rate": 5.494408836907636e-06, + "loss": 0.8188, + "step": 1829 + }, + { + "epoch": 0.38, + "learning_rate": 5.4938623625373e-06, + "loss": 0.9573, + "step": 1830 + }, + { + "epoch": 0.38, + "learning_rate": 5.493315620201629e-06, + "loss": 1.1856, + "step": 1831 + }, + { + "epoch": 0.38, + "learning_rate": 5.492768609959374e-06, + "loss": 1.0134, + "step": 1832 + }, + { + "epoch": 0.38, + "learning_rate": 5.49222133186931e-06, + "loss": 0.9265, + "step": 1833 + }, + { + "epoch": 0.38, + "learning_rate": 5.491673785990241e-06, + "loss": 0.8263, + "step": 1834 + }, + { + "epoch": 0.38, + "learning_rate": 5.491125972381002e-06, + "loss": 1.0469, + "step": 1835 + }, + { + "epoch": 0.38, + "learning_rate": 5.4905778911004555e-06, + "loss": 0.831, + "step": 1836 + }, + { + "epoch": 0.38, + "learning_rate": 5.4900295422074915e-06, + "loss": 1.004, + "step": 1837 + }, + { + "epoch": 0.38, + "learning_rate": 5.489480925761031e-06, + "loss": 1.141, + "step": 1838 + }, + { + "epoch": 0.38, + "learning_rate": 5.4889320418200225e-06, + "loss": 0.9205, + "step": 1839 + }, + { + "epoch": 0.38, + "learning_rate": 5.488382890443444e-06, + "loss": 0.7996, + "step": 1840 + }, + { + "epoch": 0.38, + "learning_rate": 5.4878334716903015e-06, + "loss": 1.0184, + "step": 1841 + }, + { + "epoch": 0.38, + "learning_rate": 5.48728378561963e-06, + "loss": 1.2054, + "step": 1842 + }, + { + "epoch": 0.38, + "learning_rate": 5.486733832290494e-06, + "loss": 0.9128, + "step": 1843 + }, + { + "epoch": 0.38, + "learning_rate": 5.486183611761983e-06, + "loss": 0.8991, + "step": 1844 + }, + { + "epoch": 0.38, + "learning_rate": 5.485633124093223e-06, + "loss": 0.7582, + "step": 1845 + }, + { + "epoch": 0.38, + "learning_rate": 5.485082369343359e-06, + "loss": 1.0399, + "step": 1846 + }, + { + "epoch": 0.38, + "learning_rate": 5.484531347571573e-06, + "loss": 0.7783, + "step": 1847 + }, + { + "epoch": 0.38, + "learning_rate": 5.483980058837071e-06, + "loss": 0.8195, + "step": 1848 + }, + { + "epoch": 0.38, + "learning_rate": 5.483428503199087e-06, + "loss": 0.7576, + "step": 1849 + }, + { + "epoch": 0.38, + "learning_rate": 5.48287668071689e-06, + "loss": 0.8249, + "step": 1850 + }, + { + "epoch": 0.38, + "learning_rate": 5.482324591449769e-06, + "loss": 0.7686, + "step": 1851 + }, + { + "epoch": 0.39, + "learning_rate": 5.48177223545705e-06, + "loss": 1.0496, + "step": 1852 + }, + { + "epoch": 0.39, + "learning_rate": 5.481219612798079e-06, + "loss": 0.8215, + "step": 1853 + }, + { + "epoch": 0.39, + "learning_rate": 5.4806667235322395e-06, + "loss": 1.1949, + "step": 1854 + }, + { + "epoch": 0.39, + "learning_rate": 5.480113567718937e-06, + "loss": 1.1126, + "step": 1855 + }, + { + "epoch": 0.39, + "learning_rate": 5.479560145417608e-06, + "loss": 0.7972, + "step": 1856 + }, + { + "epoch": 0.39, + "learning_rate": 5.47900645668772e-06, + "loss": 1.0195, + "step": 1857 + }, + { + "epoch": 0.39, + "learning_rate": 5.478452501588763e-06, + "loss": 0.8229, + "step": 1858 + }, + { + "epoch": 0.39, + "learning_rate": 5.477898280180262e-06, + "loss": 0.9148, + "step": 1859 + }, + { + "epoch": 0.39, + "learning_rate": 5.477343792521769e-06, + "loss": 0.9664, + "step": 1860 + }, + { + "epoch": 0.39, + "learning_rate": 5.47678903867286e-06, + "loss": 0.9086, + "step": 1861 + }, + { + "epoch": 0.39, + "learning_rate": 5.476234018693148e-06, + "loss": 1.0583, + "step": 1862 + }, + { + "epoch": 0.39, + "learning_rate": 5.4756787326422656e-06, + "loss": 1.1682, + "step": 1863 + }, + { + "epoch": 0.39, + "learning_rate": 5.4751231805798806e-06, + "loss": 1.0053, + "step": 1864 + }, + { + "epoch": 0.39, + "learning_rate": 5.474567362565687e-06, + "loss": 1.0072, + "step": 1865 + }, + { + "epoch": 0.39, + "learning_rate": 5.474011278659406e-06, + "loss": 1.1222, + "step": 1866 + }, + { + "epoch": 0.39, + "learning_rate": 5.47345492892079e-06, + "loss": 0.8537, + "step": 1867 + }, + { + "epoch": 0.39, + "learning_rate": 5.472898313409618e-06, + "loss": 0.823, + "step": 1868 + }, + { + "epoch": 0.39, + "learning_rate": 5.4723414321857e-06, + "loss": 0.9539, + "step": 1869 + }, + { + "epoch": 0.39, + "learning_rate": 5.471784285308871e-06, + "loss": 0.9165, + "step": 1870 + }, + { + "epoch": 0.39, + "learning_rate": 5.471226872838997e-06, + "loss": 0.9209, + "step": 1871 + }, + { + "epoch": 0.39, + "learning_rate": 5.4706691948359705e-06, + "loss": 0.8952, + "step": 1872 + }, + { + "epoch": 0.39, + "learning_rate": 5.470111251359716e-06, + "loss": 0.8653, + "step": 1873 + }, + { + "epoch": 0.39, + "learning_rate": 5.469553042470184e-06, + "loss": 0.9445, + "step": 1874 + }, + { + "epoch": 0.39, + "learning_rate": 5.468994568227354e-06, + "loss": 0.9305, + "step": 1875 + }, + { + "epoch": 0.39, + "learning_rate": 5.468435828691234e-06, + "loss": 1.0816, + "step": 1876 + }, + { + "epoch": 0.39, + "learning_rate": 5.46787682392186e-06, + "loss": 1.0109, + "step": 1877 + }, + { + "epoch": 0.39, + "learning_rate": 5.467317553979298e-06, + "loss": 0.9169, + "step": 1878 + }, + { + "epoch": 0.39, + "learning_rate": 5.466758018923639e-06, + "loss": 1.0254, + "step": 1879 + }, + { + "epoch": 0.39, + "learning_rate": 5.4661982188150096e-06, + "loss": 1.1398, + "step": 1880 + }, + { + "epoch": 0.39, + "learning_rate": 5.465638153713555e-06, + "loss": 0.9408, + "step": 1881 + }, + { + "epoch": 0.39, + "learning_rate": 5.465077823679459e-06, + "loss": 0.9106, + "step": 1882 + }, + { + "epoch": 0.39, + "learning_rate": 5.464517228772925e-06, + "loss": 0.9653, + "step": 1883 + }, + { + "epoch": 0.39, + "learning_rate": 5.4639563690541925e-06, + "loss": 0.8979, + "step": 1884 + }, + { + "epoch": 0.39, + "learning_rate": 5.463395244583523e-06, + "loss": 1.1831, + "step": 1885 + }, + { + "epoch": 0.39, + "learning_rate": 5.462833855421209e-06, + "loss": 1.0886, + "step": 1886 + }, + { + "epoch": 0.39, + "learning_rate": 5.462272201627575e-06, + "loss": 0.793, + "step": 1887 + }, + { + "epoch": 0.39, + "learning_rate": 5.461710283262966e-06, + "loss": 1.0232, + "step": 1888 + }, + { + "epoch": 0.39, + "learning_rate": 5.4611481003877654e-06, + "loss": 0.8853, + "step": 1889 + }, + { + "epoch": 0.39, + "learning_rate": 5.460585653062377e-06, + "loss": 0.9787, + "step": 1890 + }, + { + "epoch": 0.39, + "learning_rate": 5.460022941347234e-06, + "loss": 1.1529, + "step": 1891 + }, + { + "epoch": 0.39, + "learning_rate": 5.459459965302802e-06, + "loss": 0.8324, + "step": 1892 + }, + { + "epoch": 0.39, + "learning_rate": 5.458896724989572e-06, + "loss": 0.7484, + "step": 1893 + }, + { + "epoch": 0.39, + "learning_rate": 5.458333220468066e-06, + "loss": 0.8327, + "step": 1894 + }, + { + "epoch": 0.39, + "learning_rate": 5.457769451798829e-06, + "loss": 0.921, + "step": 1895 + }, + { + "epoch": 0.39, + "learning_rate": 5.457205419042441e-06, + "loss": 0.8538, + "step": 1896 + }, + { + "epoch": 0.39, + "learning_rate": 5.456641122259506e-06, + "loss": 1.1132, + "step": 1897 + }, + { + "epoch": 0.39, + "learning_rate": 5.456076561510657e-06, + "loss": 1.0289, + "step": 1898 + }, + { + "epoch": 0.39, + "learning_rate": 5.455511736856557e-06, + "loss": 0.9874, + "step": 1899 + }, + { + "epoch": 0.4, + "learning_rate": 5.454946648357896e-06, + "loss": 0.9705, + "step": 1900 + }, + { + "epoch": 0.4, + "learning_rate": 5.4543812960753924e-06, + "loss": 1.1799, + "step": 1901 + }, + { + "epoch": 0.4, + "learning_rate": 5.453815680069795e-06, + "loss": 0.9194, + "step": 1902 + }, + { + "epoch": 0.4, + "learning_rate": 5.453249800401877e-06, + "loss": 1.214, + "step": 1903 + }, + { + "epoch": 0.4, + "learning_rate": 5.4526836571324436e-06, + "loss": 1.0368, + "step": 1904 + }, + { + "epoch": 0.4, + "learning_rate": 5.452117250322325e-06, + "loss": 0.9438, + "step": 1905 + }, + { + "epoch": 0.4, + "learning_rate": 5.451550580032384e-06, + "loss": 1.0484, + "step": 1906 + }, + { + "epoch": 0.4, + "learning_rate": 5.4509836463235075e-06, + "loss": 1.011, + "step": 1907 + }, + { + "epoch": 0.4, + "learning_rate": 5.450416449256614e-06, + "loss": 0.8974, + "step": 1908 + }, + { + "epoch": 0.4, + "learning_rate": 5.449848988892647e-06, + "loss": 0.8694, + "step": 1909 + }, + { + "epoch": 0.4, + "learning_rate": 5.449281265292581e-06, + "loss": 0.8961, + "step": 1910 + }, + { + "epoch": 0.4, + "learning_rate": 5.448713278517418e-06, + "loss": 1.1372, + "step": 1911 + }, + { + "epoch": 0.4, + "learning_rate": 5.4481450286281874e-06, + "loss": 0.8131, + "step": 1912 + }, + { + "epoch": 0.4, + "learning_rate": 5.447576515685949e-06, + "loss": 0.9954, + "step": 1913 + }, + { + "epoch": 0.4, + "learning_rate": 5.447007739751788e-06, + "loss": 1.0506, + "step": 1914 + }, + { + "epoch": 0.4, + "learning_rate": 5.446438700886821e-06, + "loss": 0.8486, + "step": 1915 + }, + { + "epoch": 0.4, + "learning_rate": 5.445869399152189e-06, + "loss": 1.0307, + "step": 1916 + }, + { + "epoch": 0.4, + "learning_rate": 5.445299834609066e-06, + "loss": 1.0452, + "step": 1917 + }, + { + "epoch": 0.4, + "learning_rate": 5.444730007318649e-06, + "loss": 0.907, + "step": 1918 + }, + { + "epoch": 0.4, + "learning_rate": 5.444159917342167e-06, + "loss": 1.0056, + "step": 1919 + }, + { + "epoch": 0.4, + "learning_rate": 5.443589564740877e-06, + "loss": 0.8903, + "step": 1920 + }, + { + "epoch": 0.4, + "learning_rate": 5.443018949576063e-06, + "loss": 1.2022, + "step": 1921 + }, + { + "epoch": 0.4, + "learning_rate": 5.442448071909037e-06, + "loss": 1.0649, + "step": 1922 + }, + { + "epoch": 0.4, + "learning_rate": 5.441876931801139e-06, + "loss": 1.0977, + "step": 1923 + }, + { + "epoch": 0.4, + "learning_rate": 5.44130552931374e-06, + "loss": 0.9511, + "step": 1924 + }, + { + "epoch": 0.4, + "learning_rate": 5.440733864508235e-06, + "loss": 0.7768, + "step": 1925 + }, + { + "epoch": 0.4, + "learning_rate": 5.440161937446052e-06, + "loss": 1.1062, + "step": 1926 + }, + { + "epoch": 0.4, + "learning_rate": 5.439589748188641e-06, + "loss": 0.8558, + "step": 1927 + }, + { + "epoch": 0.4, + "learning_rate": 5.439017296797487e-06, + "loss": 0.8351, + "step": 1928 + }, + { + "epoch": 0.4, + "learning_rate": 5.4384445833340985e-06, + "loss": 1.0016, + "step": 1929 + }, + { + "epoch": 0.4, + "learning_rate": 5.437871607860013e-06, + "loss": 1.0766, + "step": 1930 + }, + { + "epoch": 0.4, + "learning_rate": 5.437298370436797e-06, + "loss": 0.8905, + "step": 1931 + }, + { + "epoch": 0.4, + "learning_rate": 5.436724871126045e-06, + "loss": 0.8563, + "step": 1932 + }, + { + "epoch": 0.4, + "learning_rate": 5.43615110998938e-06, + "loss": 1.119, + "step": 1933 + }, + { + "epoch": 0.4, + "learning_rate": 5.435577087088453e-06, + "loss": 0.9224, + "step": 1934 + }, + { + "epoch": 0.4, + "learning_rate": 5.43500280248494e-06, + "loss": 0.9588, + "step": 1935 + }, + { + "epoch": 0.4, + "learning_rate": 5.434428256240551e-06, + "loss": 0.8386, + "step": 1936 + }, + { + "epoch": 0.4, + "learning_rate": 5.433853448417019e-06, + "loss": 0.9214, + "step": 1937 + }, + { + "epoch": 0.4, + "learning_rate": 5.4332783790761065e-06, + "loss": 0.84, + "step": 1938 + }, + { + "epoch": 0.4, + "learning_rate": 5.432703048279607e-06, + "loss": 1.0976, + "step": 1939 + }, + { + "epoch": 0.4, + "learning_rate": 5.432127456089339e-06, + "loss": 0.8978, + "step": 1940 + }, + { + "epoch": 0.4, + "learning_rate": 5.431551602567149e-06, + "loss": 0.848, + "step": 1941 + }, + { + "epoch": 0.4, + "learning_rate": 5.430975487774912e-06, + "loss": 0.8924, + "step": 1942 + }, + { + "epoch": 0.4, + "learning_rate": 5.430399111774534e-06, + "loss": 1.0204, + "step": 1943 + }, + { + "epoch": 0.4, + "learning_rate": 5.429822474627943e-06, + "loss": 0.8708, + "step": 1944 + }, + { + "epoch": 0.4, + "learning_rate": 5.429245576397102e-06, + "loss": 1.068, + "step": 1945 + }, + { + "epoch": 0.4, + "learning_rate": 5.428668417143996e-06, + "loss": 0.9335, + "step": 1946 + }, + { + "epoch": 0.4, + "learning_rate": 5.428090996930642e-06, + "loss": 0.7742, + "step": 1947 + }, + { + "epoch": 0.41, + "learning_rate": 5.4275133158190845e-06, + "loss": 1.1121, + "step": 1948 + }, + { + "epoch": 0.41, + "learning_rate": 5.4269353738713944e-06, + "loss": 0.8645, + "step": 1949 + }, + { + "epoch": 0.41, + "learning_rate": 5.426357171149671e-06, + "loss": 0.9939, + "step": 1950 + }, + { + "epoch": 0.41, + "learning_rate": 5.4257787077160424e-06, + "loss": 0.9021, + "step": 1951 + }, + { + "epoch": 0.41, + "learning_rate": 5.425199983632666e-06, + "loss": 0.9096, + "step": 1952 + }, + { + "epoch": 0.41, + "learning_rate": 5.424620998961723e-06, + "loss": 0.9227, + "step": 1953 + }, + { + "epoch": 0.41, + "learning_rate": 5.4240417537654275e-06, + "loss": 0.8134, + "step": 1954 + }, + { + "epoch": 0.41, + "learning_rate": 5.423462248106018e-06, + "loss": 0.913, + "step": 1955 + }, + { + "epoch": 0.41, + "learning_rate": 5.4228824820457635e-06, + "loss": 0.9067, + "step": 1956 + }, + { + "epoch": 0.41, + "learning_rate": 5.4223024556469585e-06, + "loss": 1.062, + "step": 1957 + }, + { + "epoch": 0.41, + "learning_rate": 5.421722168971928e-06, + "loss": 0.9732, + "step": 1958 + }, + { + "epoch": 0.41, + "learning_rate": 5.421141622083024e-06, + "loss": 0.7693, + "step": 1959 + }, + { + "epoch": 0.41, + "learning_rate": 5.420560815042624e-06, + "loss": 1.1427, + "step": 1960 + }, + { + "epoch": 0.41, + "learning_rate": 5.419979747913138e-06, + "loss": 1.091, + "step": 1961 + }, + { + "epoch": 0.41, + "learning_rate": 5.419398420757e-06, + "loss": 0.9646, + "step": 1962 + }, + { + "epoch": 0.41, + "learning_rate": 5.418816833636676e-06, + "loss": 0.8042, + "step": 1963 + }, + { + "epoch": 0.41, + "learning_rate": 5.418234986614653e-06, + "loss": 0.9498, + "step": 1964 + }, + { + "epoch": 0.41, + "learning_rate": 5.417652879753456e-06, + "loss": 0.9812, + "step": 1965 + }, + { + "epoch": 0.41, + "learning_rate": 5.417070513115628e-06, + "loss": 0.8327, + "step": 1966 + }, + { + "epoch": 0.41, + "learning_rate": 5.416487886763747e-06, + "loss": 1.1095, + "step": 1967 + }, + { + "epoch": 0.41, + "learning_rate": 5.415905000760413e-06, + "loss": 1.1187, + "step": 1968 + }, + { + "epoch": 0.41, + "learning_rate": 5.415321855168262e-06, + "loss": 1.1365, + "step": 1969 + }, + { + "epoch": 0.41, + "learning_rate": 5.414738450049948e-06, + "loss": 0.7954, + "step": 1970 + }, + { + "epoch": 0.41, + "learning_rate": 5.41415478546816e-06, + "loss": 1.1697, + "step": 1971 + }, + { + "epoch": 0.41, + "learning_rate": 5.413570861485613e-06, + "loss": 1.0876, + "step": 1972 + }, + { + "epoch": 0.41, + "learning_rate": 5.412986678165049e-06, + "loss": 0.9512, + "step": 1973 + }, + { + "epoch": 0.41, + "learning_rate": 5.412402235569239e-06, + "loss": 1.0945, + "step": 1974 + }, + { + "epoch": 0.41, + "learning_rate": 5.41181753376098e-06, + "loss": 0.9918, + "step": 1975 + }, + { + "epoch": 0.41, + "learning_rate": 5.4112325728031e-06, + "loss": 0.9063, + "step": 1976 + }, + { + "epoch": 0.41, + "learning_rate": 5.410647352758452e-06, + "loss": 0.8894, + "step": 1977 + }, + { + "epoch": 0.41, + "learning_rate": 5.410061873689918e-06, + "loss": 0.9111, + "step": 1978 + }, + { + "epoch": 0.41, + "learning_rate": 5.409476135660407e-06, + "loss": 0.9663, + "step": 1979 + }, + { + "epoch": 0.41, + "learning_rate": 5.408890138732858e-06, + "loss": 0.8124, + "step": 1980 + }, + { + "epoch": 0.41, + "learning_rate": 5.4083038829702354e-06, + "loss": 1.0365, + "step": 1981 + }, + { + "epoch": 0.41, + "learning_rate": 5.407717368435532e-06, + "loss": 0.7885, + "step": 1982 + }, + { + "epoch": 0.41, + "learning_rate": 5.407130595191769e-06, + "loss": 1.0983, + "step": 1983 + }, + { + "epoch": 0.41, + "learning_rate": 5.406543563301996e-06, + "loss": 0.9095, + "step": 1984 + }, + { + "epoch": 0.41, + "learning_rate": 5.405956272829288e-06, + "loss": 0.9744, + "step": 1985 + }, + { + "epoch": 0.41, + "learning_rate": 5.40536872383675e-06, + "loss": 0.9156, + "step": 1986 + }, + { + "epoch": 0.41, + "learning_rate": 5.404780916387515e-06, + "loss": 0.8263, + "step": 1987 + }, + { + "epoch": 0.41, + "learning_rate": 5.404192850544742e-06, + "loss": 0.8564, + "step": 1988 + }, + { + "epoch": 0.41, + "learning_rate": 5.4036045263716195e-06, + "loss": 0.7952, + "step": 1989 + }, + { + "epoch": 0.41, + "learning_rate": 5.403015943931361e-06, + "loss": 1.0267, + "step": 1990 + }, + { + "epoch": 0.41, + "learning_rate": 5.402427103287211e-06, + "loss": 0.8345, + "step": 1991 + }, + { + "epoch": 0.41, + "learning_rate": 5.40183800450244e-06, + "loss": 1.1449, + "step": 1992 + }, + { + "epoch": 0.41, + "learning_rate": 5.4012486476403485e-06, + "loss": 0.8897, + "step": 1993 + }, + { + "epoch": 0.41, + "learning_rate": 5.4006590327642595e-06, + "loss": 0.9408, + "step": 1994 + }, + { + "epoch": 0.41, + "learning_rate": 5.400069159937529e-06, + "loss": 1.1026, + "step": 1995 + }, + { + "epoch": 0.42, + "learning_rate": 5.3994790292235385e-06, + "loss": 0.8509, + "step": 1996 + }, + { + "epoch": 0.42, + "learning_rate": 5.398888640685698e-06, + "loss": 0.9691, + "step": 1997 + }, + { + "epoch": 0.42, + "learning_rate": 5.398297994387444e-06, + "loss": 1.0327, + "step": 1998 + }, + { + "epoch": 0.42, + "learning_rate": 5.397707090392242e-06, + "loss": 1.0577, + "step": 1999 + }, + { + "epoch": 0.42, + "learning_rate": 5.397115928763583e-06, + "loss": 0.8344, + "step": 2000 + }, + { + "epoch": 0.42, + "eval_loss": NaN, + "eval_runtime": 15.0286, + "eval_samples_per_second": 352.327, + "eval_steps_per_second": 44.049, + "step": 2000 + }, + { + "epoch": 0.42, + "learning_rate": 5.396524509564989e-06, + "loss": 0.9744, + "step": 2001 + }, + { + "epoch": 0.42, + "learning_rate": 5.395932832860008e-06, + "loss": 1.0089, + "step": 2002 + }, + { + "epoch": 0.42, + "learning_rate": 5.395340898712215e-06, + "loss": 0.8524, + "step": 2003 + }, + { + "epoch": 0.42, + "learning_rate": 5.394748707185213e-06, + "loss": 0.7612, + "step": 2004 + }, + { + "epoch": 0.42, + "learning_rate": 5.394156258342634e-06, + "loss": 0.9477, + "step": 2005 + }, + { + "epoch": 0.42, + "learning_rate": 5.393563552248136e-06, + "loss": 0.9264, + "step": 2006 + }, + { + "epoch": 0.42, + "learning_rate": 5.392970588965404e-06, + "loss": 1.0177, + "step": 2007 + }, + { + "epoch": 0.42, + "learning_rate": 5.3923773685581535e-06, + "loss": 1.2797, + "step": 2008 + }, + { + "epoch": 0.42, + "learning_rate": 5.391783891090125e-06, + "loss": 1.0346, + "step": 2009 + }, + { + "epoch": 0.42, + "learning_rate": 5.391190156625089e-06, + "loss": 0.8052, + "step": 2010 + }, + { + "epoch": 0.42, + "learning_rate": 5.3905961652268406e-06, + "loss": 0.8956, + "step": 2011 + }, + { + "epoch": 0.42, + "learning_rate": 5.390001916959205e-06, + "loss": 1.025, + "step": 2012 + }, + { + "epoch": 0.42, + "learning_rate": 5.389407411886034e-06, + "loss": 1.0725, + "step": 2013 + }, + { + "epoch": 0.42, + "learning_rate": 5.3888126500712074e-06, + "loss": 0.9196, + "step": 2014 + }, + { + "epoch": 0.42, + "learning_rate": 5.3882176315786315e-06, + "loss": 0.9083, + "step": 2015 + }, + { + "epoch": 0.42, + "learning_rate": 5.387622356472242e-06, + "loss": 0.9892, + "step": 2016 + }, + { + "epoch": 0.42, + "learning_rate": 5.387026824816e-06, + "loss": 0.8558, + "step": 2017 + }, + { + "epoch": 0.42, + "learning_rate": 5.386431036673897e-06, + "loss": 0.8412, + "step": 2018 + }, + { + "epoch": 0.42, + "learning_rate": 5.385834992109948e-06, + "loss": 0.8734, + "step": 2019 + }, + { + "epoch": 0.42, + "learning_rate": 5.385238691188201e-06, + "loss": 0.9499, + "step": 2020 + }, + { + "epoch": 0.42, + "learning_rate": 5.384642133972725e-06, + "loss": 1.2012, + "step": 2021 + }, + { + "epoch": 0.42, + "learning_rate": 5.384045320527623e-06, + "loss": 0.7914, + "step": 2022 + }, + { + "epoch": 0.42, + "learning_rate": 5.383448250917022e-06, + "loss": 0.9789, + "step": 2023 + }, + { + "epoch": 0.42, + "learning_rate": 5.3828509252050755e-06, + "loss": 0.9152, + "step": 2024 + }, + { + "epoch": 0.42, + "learning_rate": 5.382253343455967e-06, + "loss": 0.8777, + "step": 2025 + }, + { + "epoch": 0.42, + "learning_rate": 5.381655505733908e-06, + "loss": 1.0393, + "step": 2026 + }, + { + "epoch": 0.42, + "learning_rate": 5.381057412103134e-06, + "loss": 0.9291, + "step": 2027 + }, + { + "epoch": 0.42, + "learning_rate": 5.380459062627911e-06, + "loss": 1.026, + "step": 2028 + }, + { + "epoch": 0.42, + "learning_rate": 5.379860457372531e-06, + "loss": 1.091, + "step": 2029 + }, + { + "epoch": 0.42, + "learning_rate": 5.379261596401316e-06, + "loss": 0.862, + "step": 2030 + }, + { + "epoch": 0.42, + "learning_rate": 5.378662479778611e-06, + "loss": 1.2462, + "step": 2031 + }, + { + "epoch": 0.42, + "learning_rate": 5.378063107568793e-06, + "loss": 0.8137, + "step": 2032 + }, + { + "epoch": 0.42, + "learning_rate": 5.3774634798362646e-06, + "loss": 1.0157, + "step": 2033 + }, + { + "epoch": 0.42, + "learning_rate": 5.376863596645453e-06, + "loss": 1.0534, + "step": 2034 + }, + { + "epoch": 0.42, + "learning_rate": 5.376263458060819e-06, + "loss": 0.9251, + "step": 2035 + }, + { + "epoch": 0.42, + "learning_rate": 5.375663064146846e-06, + "loss": 1.0599, + "step": 2036 + }, + { + "epoch": 0.42, + "learning_rate": 5.375062414968045e-06, + "loss": 1.1351, + "step": 2037 + }, + { + "epoch": 0.42, + "learning_rate": 5.374461510588958e-06, + "loss": 0.9465, + "step": 2038 + }, + { + "epoch": 0.42, + "learning_rate": 5.37386035107415e-06, + "loss": 1.0234, + "step": 2039 + }, + { + "epoch": 0.42, + "learning_rate": 5.373258936488218e-06, + "loss": 0.9426, + "step": 2040 + }, + { + "epoch": 0.42, + "learning_rate": 5.372657266895783e-06, + "loss": 0.9145, + "step": 2041 + }, + { + "epoch": 0.42, + "learning_rate": 5.372055342361493e-06, + "loss": 1.0325, + "step": 2042 + }, + { + "epoch": 0.42, + "learning_rate": 5.371453162950026e-06, + "loss": 0.9737, + "step": 2043 + }, + { + "epoch": 0.43, + "learning_rate": 5.370850728726086e-06, + "loss": 0.851, + "step": 2044 + }, + { + "epoch": 0.43, + "learning_rate": 5.370248039754404e-06, + "loss": 0.8591, + "step": 2045 + }, + { + "epoch": 0.43, + "learning_rate": 5.3696450960997385e-06, + "loss": 1.0886, + "step": 2046 + }, + { + "epoch": 0.43, + "learning_rate": 5.369041897826877e-06, + "loss": 0.9065, + "step": 2047 + }, + { + "epoch": 0.43, + "learning_rate": 5.368438445000634e-06, + "loss": 0.9592, + "step": 2048 + }, + { + "epoch": 0.43, + "learning_rate": 5.367834737685848e-06, + "loss": 1.0103, + "step": 2049 + }, + { + "epoch": 0.43, + "learning_rate": 5.367230775947388e-06, + "loss": 1.0671, + "step": 2050 + }, + { + "epoch": 0.43, + "learning_rate": 5.366626559850152e-06, + "loss": 1.0871, + "step": 2051 + }, + { + "epoch": 0.43, + "learning_rate": 5.366022089459059e-06, + "loss": 0.9994, + "step": 2052 + }, + { + "epoch": 0.43, + "learning_rate": 5.365417364839062e-06, + "loss": 0.8809, + "step": 2053 + }, + { + "epoch": 0.43, + "learning_rate": 5.364812386055138e-06, + "loss": 1.1061, + "step": 2054 + }, + { + "epoch": 0.43, + "learning_rate": 5.364207153172291e-06, + "loss": 1.0178, + "step": 2055 + }, + { + "epoch": 0.43, + "learning_rate": 5.3636016662555554e-06, + "loss": 0.9353, + "step": 2056 + }, + { + "epoch": 0.43, + "learning_rate": 5.362995925369989e-06, + "loss": 0.926, + "step": 2057 + }, + { + "epoch": 0.43, + "learning_rate": 5.3623899305806786e-06, + "loss": 1.0147, + "step": 2058 + }, + { + "epoch": 0.43, + "learning_rate": 5.36178368195274e-06, + "loss": 0.9483, + "step": 2059 + }, + { + "epoch": 0.43, + "learning_rate": 5.361177179551312e-06, + "loss": 1.1992, + "step": 2060 + }, + { + "epoch": 0.43, + "learning_rate": 5.360570423441564e-06, + "loss": 0.8016, + "step": 2061 + }, + { + "epoch": 0.43, + "learning_rate": 5.359963413688694e-06, + "loss": 0.9437, + "step": 2062 + }, + { + "epoch": 0.43, + "learning_rate": 5.3593561503579225e-06, + "loss": 0.9806, + "step": 2063 + }, + { + "epoch": 0.43, + "learning_rate": 5.358748633514502e-06, + "loss": 1.1723, + "step": 2064 + }, + { + "epoch": 0.43, + "learning_rate": 5.358140863223708e-06, + "loss": 0.8871, + "step": 2065 + }, + { + "epoch": 0.43, + "learning_rate": 5.357532839550848e-06, + "loss": 0.8116, + "step": 2066 + }, + { + "epoch": 0.43, + "learning_rate": 5.356924562561251e-06, + "loss": 0.7259, + "step": 2067 + }, + { + "epoch": 0.43, + "learning_rate": 5.356316032320279e-06, + "loss": 0.9829, + "step": 2068 + }, + { + "epoch": 0.43, + "learning_rate": 5.355707248893318e-06, + "loss": 0.9762, + "step": 2069 + }, + { + "epoch": 0.43, + "learning_rate": 5.355098212345779e-06, + "loss": 0.8791, + "step": 2070 + }, + { + "epoch": 0.43, + "learning_rate": 5.354488922743107e-06, + "loss": 0.9837, + "step": 2071 + }, + { + "epoch": 0.43, + "learning_rate": 5.353879380150768e-06, + "loss": 0.8277, + "step": 2072 + }, + { + "epoch": 0.43, + "learning_rate": 5.353269584634257e-06, + "loss": 0.8697, + "step": 2073 + }, + { + "epoch": 0.43, + "learning_rate": 5.352659536259098e-06, + "loss": 1.1657, + "step": 2074 + }, + { + "epoch": 0.43, + "learning_rate": 5.352049235090839e-06, + "loss": 1.0837, + "step": 2075 + }, + { + "epoch": 0.43, + "learning_rate": 5.3514386811950574e-06, + "loss": 0.9615, + "step": 2076 + }, + { + "epoch": 0.43, + "learning_rate": 5.350827874637358e-06, + "loss": 1.2017, + "step": 2077 + }, + { + "epoch": 0.43, + "learning_rate": 5.350216815483371e-06, + "loss": 0.8883, + "step": 2078 + }, + { + "epoch": 0.43, + "learning_rate": 5.3496055037987535e-06, + "loss": 0.9686, + "step": 2079 + }, + { + "epoch": 0.43, + "learning_rate": 5.348993939649194e-06, + "loss": 1.0088, + "step": 2080 + }, + { + "epoch": 0.43, + "learning_rate": 5.348382123100403e-06, + "loss": 1.2405, + "step": 2081 + }, + { + "epoch": 0.43, + "learning_rate": 5.34777005421812e-06, + "loss": 0.8991, + "step": 2082 + }, + { + "epoch": 0.43, + "learning_rate": 5.347157733068113e-06, + "loss": 1.2714, + "step": 2083 + }, + { + "epoch": 0.43, + "learning_rate": 5.346545159716174e-06, + "loss": 0.9427, + "step": 2084 + }, + { + "epoch": 0.43, + "learning_rate": 5.345932334228125e-06, + "loss": 0.9592, + "step": 2085 + }, + { + "epoch": 0.43, + "learning_rate": 5.345319256669814e-06, + "loss": 0.8065, + "step": 2086 + }, + { + "epoch": 0.43, + "learning_rate": 5.344705927107118e-06, + "loss": 1.0332, + "step": 2087 + }, + { + "epoch": 0.43, + "learning_rate": 5.344092345605936e-06, + "loss": 0.9296, + "step": 2088 + }, + { + "epoch": 0.43, + "learning_rate": 5.3434785122321986e-06, + "loss": 0.8416, + "step": 2089 + }, + { + "epoch": 0.43, + "learning_rate": 5.342864427051864e-06, + "loss": 0.7472, + "step": 2090 + }, + { + "epoch": 0.43, + "learning_rate": 5.342250090130912e-06, + "loss": 0.7449, + "step": 2091 + }, + { + "epoch": 0.44, + "learning_rate": 5.341635501535355e-06, + "loss": 0.8141, + "step": 2092 + }, + { + "epoch": 0.44, + "learning_rate": 5.341020661331231e-06, + "loss": 0.9602, + "step": 2093 + }, + { + "epoch": 0.44, + "learning_rate": 5.340405569584603e-06, + "loss": 1.0518, + "step": 2094 + }, + { + "epoch": 0.44, + "learning_rate": 5.339790226361565e-06, + "loss": 0.9843, + "step": 2095 + }, + { + "epoch": 0.44, + "learning_rate": 5.3391746317282345e-06, + "loss": 0.9381, + "step": 2096 + }, + { + "epoch": 0.44, + "learning_rate": 5.338558785750756e-06, + "loss": 1.0291, + "step": 2097 + }, + { + "epoch": 0.44, + "learning_rate": 5.3379426884953025e-06, + "loss": 0.9797, + "step": 2098 + }, + { + "epoch": 0.44, + "learning_rate": 5.337326340028074e-06, + "loss": 1.057, + "step": 2099 + }, + { + "epoch": 0.44, + "learning_rate": 5.336709740415298e-06, + "loss": 0.8881, + "step": 2100 + }, + { + "epoch": 0.44, + "learning_rate": 5.336092889723226e-06, + "loss": 1.0142, + "step": 2101 + }, + { + "epoch": 0.44, + "learning_rate": 5.33547578801814e-06, + "loss": 0.6481, + "step": 2102 + }, + { + "epoch": 0.44, + "learning_rate": 5.334858435366347e-06, + "loss": 0.9247, + "step": 2103 + }, + { + "epoch": 0.44, + "learning_rate": 5.334240831834182e-06, + "loss": 1.1453, + "step": 2104 + }, + { + "epoch": 0.44, + "learning_rate": 5.333622977488007e-06, + "loss": 1.0225, + "step": 2105 + }, + { + "epoch": 0.44, + "learning_rate": 5.333004872394209e-06, + "loss": 1.1377, + "step": 2106 + }, + { + "epoch": 0.44, + "learning_rate": 5.332386516619203e-06, + "loss": 0.8496, + "step": 2107 + }, + { + "epoch": 0.44, + "learning_rate": 5.331767910229433e-06, + "loss": 1.0057, + "step": 2108 + }, + { + "epoch": 0.44, + "learning_rate": 5.331149053291367e-06, + "loss": 0.9775, + "step": 2109 + }, + { + "epoch": 0.44, + "learning_rate": 5.330529945871503e-06, + "loss": 1.0833, + "step": 2110 + }, + { + "epoch": 0.44, + "learning_rate": 5.329910588036361e-06, + "loss": 1.1037, + "step": 2111 + }, + { + "epoch": 0.44, + "learning_rate": 5.329290979852494e-06, + "loss": 0.9516, + "step": 2112 + }, + { + "epoch": 0.44, + "learning_rate": 5.328671121386477e-06, + "loss": 0.8411, + "step": 2113 + }, + { + "epoch": 0.44, + "learning_rate": 5.328051012704913e-06, + "loss": 0.876, + "step": 2114 + }, + { + "epoch": 0.44, + "learning_rate": 5.327430653874435e-06, + "loss": 1.0085, + "step": 2115 + }, + { + "epoch": 0.44, + "learning_rate": 5.326810044961699e-06, + "loss": 0.9423, + "step": 2116 + }, + { + "epoch": 0.44, + "learning_rate": 5.3261891860333895e-06, + "loss": 0.9133, + "step": 2117 + }, + { + "epoch": 0.44, + "learning_rate": 5.325568077156218e-06, + "loss": 0.7395, + "step": 2118 + }, + { + "epoch": 0.44, + "learning_rate": 5.324946718396922e-06, + "loss": 0.8559, + "step": 2119 + }, + { + "epoch": 0.44, + "learning_rate": 5.324325109822269e-06, + "loss": 0.8962, + "step": 2120 + }, + { + "epoch": 0.44, + "learning_rate": 5.323703251499047e-06, + "loss": 0.9687, + "step": 2121 + }, + { + "epoch": 0.44, + "learning_rate": 5.323081143494077e-06, + "loss": 0.9476, + "step": 2122 + }, + { + "epoch": 0.44, + "learning_rate": 5.322458785874203e-06, + "loss": 0.9525, + "step": 2123 + }, + { + "epoch": 0.44, + "learning_rate": 5.3218361787062994e-06, + "loss": 1.0954, + "step": 2124 + }, + { + "epoch": 0.44, + "learning_rate": 5.321213322057264e-06, + "loss": 0.8106, + "step": 2125 + }, + { + "epoch": 0.44, + "learning_rate": 5.320590215994022e-06, + "loss": 0.7288, + "step": 2126 + }, + { + "epoch": 0.44, + "learning_rate": 5.319966860583528e-06, + "loss": 1.0748, + "step": 2127 + }, + { + "epoch": 0.44, + "learning_rate": 5.319343255892759e-06, + "loss": 1.0312, + "step": 2128 + }, + { + "epoch": 0.44, + "learning_rate": 5.318719401988724e-06, + "loss": 0.9833, + "step": 2129 + }, + { + "epoch": 0.44, + "learning_rate": 5.318095298938454e-06, + "loss": 0.9763, + "step": 2130 + }, + { + "epoch": 0.44, + "learning_rate": 5.317470946809009e-06, + "loss": 0.9666, + "step": 2131 + }, + { + "epoch": 0.44, + "learning_rate": 5.3168463456674776e-06, + "loss": 0.8606, + "step": 2132 + }, + { + "epoch": 0.44, + "learning_rate": 5.31622149558097e-06, + "loss": 1.0479, + "step": 2133 + }, + { + "epoch": 0.44, + "learning_rate": 5.3155963966166294e-06, + "loss": 0.9231, + "step": 2134 + }, + { + "epoch": 0.44, + "learning_rate": 5.314971048841621e-06, + "loss": 0.8621, + "step": 2135 + }, + { + "epoch": 0.44, + "learning_rate": 5.314345452323137e-06, + "loss": 0.8674, + "step": 2136 + }, + { + "epoch": 0.44, + "learning_rate": 5.313719607128401e-06, + "loss": 1.0201, + "step": 2137 + }, + { + "epoch": 0.44, + "learning_rate": 5.313093513324658e-06, + "loss": 1.007, + "step": 2138 + }, + { + "epoch": 0.44, + "learning_rate": 5.3124671709791825e-06, + "loss": 0.9265, + "step": 2139 + }, + { + "epoch": 0.45, + "learning_rate": 5.311840580159274e-06, + "loss": 1.0341, + "step": 2140 + }, + { + "epoch": 0.45, + "learning_rate": 5.31121374093226e-06, + "loss": 1.0586, + "step": 2141 + }, + { + "epoch": 0.45, + "learning_rate": 5.310586653365495e-06, + "loss": 1.0631, + "step": 2142 + }, + { + "epoch": 0.45, + "learning_rate": 5.309959317526359e-06, + "loss": 0.9898, + "step": 2143 + }, + { + "epoch": 0.45, + "learning_rate": 5.309331733482257e-06, + "loss": 1.161, + "step": 2144 + }, + { + "epoch": 0.45, + "learning_rate": 5.308703901300627e-06, + "loss": 0.817, + "step": 2145 + }, + { + "epoch": 0.45, + "learning_rate": 5.3080758210489275e-06, + "loss": 0.8678, + "step": 2146 + }, + { + "epoch": 0.45, + "learning_rate": 5.307447492794646e-06, + "loss": 1.0541, + "step": 2147 + }, + { + "epoch": 0.45, + "learning_rate": 5.306818916605295e-06, + "loss": 0.9602, + "step": 2148 + }, + { + "epoch": 0.45, + "learning_rate": 5.306190092548417e-06, + "loss": 0.9767, + "step": 2149 + }, + { + "epoch": 0.45, + "learning_rate": 5.305561020691578e-06, + "loss": 0.8718, + "step": 2150 + }, + { + "epoch": 0.45, + "learning_rate": 5.304931701102371e-06, + "loss": 0.9833, + "step": 2151 + }, + { + "epoch": 0.45, + "learning_rate": 5.304302133848418e-06, + "loss": 0.8651, + "step": 2152 + }, + { + "epoch": 0.45, + "learning_rate": 5.303672318997365e-06, + "loss": 0.9819, + "step": 2153 + }, + { + "epoch": 0.45, + "learning_rate": 5.3030422566168855e-06, + "loss": 0.9429, + "step": 2154 + }, + { + "epoch": 0.45, + "learning_rate": 5.3024119467746796e-06, + "loss": 1.2254, + "step": 2155 + }, + { + "epoch": 0.45, + "learning_rate": 5.301781389538475e-06, + "loss": 0.852, + "step": 2156 + }, + { + "epoch": 0.45, + "learning_rate": 5.301150584976024e-06, + "loss": 1.1446, + "step": 2157 + }, + { + "epoch": 0.45, + "learning_rate": 5.300519533155107e-06, + "loss": 0.9005, + "step": 2158 + }, + { + "epoch": 0.45, + "learning_rate": 5.29988823414353e-06, + "loss": 0.8197, + "step": 2159 + }, + { + "epoch": 0.45, + "learning_rate": 5.299256688009127e-06, + "loss": 0.9281, + "step": 2160 + }, + { + "epoch": 0.45, + "learning_rate": 5.298624894819755e-06, + "loss": 0.952, + "step": 2161 + }, + { + "epoch": 0.45, + "learning_rate": 5.297992854643304e-06, + "loss": 1.0952, + "step": 2162 + }, + { + "epoch": 0.45, + "learning_rate": 5.297360567547685e-06, + "loss": 1.0235, + "step": 2163 + }, + { + "epoch": 0.45, + "learning_rate": 5.296728033600836e-06, + "loss": 0.9051, + "step": 2164 + }, + { + "epoch": 0.45, + "learning_rate": 5.296095252870724e-06, + "loss": 1.0226, + "step": 2165 + }, + { + "epoch": 0.45, + "learning_rate": 5.295462225425341e-06, + "loss": 1.1001, + "step": 2166 + }, + { + "epoch": 0.45, + "learning_rate": 5.2948289513327054e-06, + "loss": 1.2066, + "step": 2167 + }, + { + "epoch": 0.45, + "learning_rate": 5.2941954306608625e-06, + "loss": 1.0407, + "step": 2168 + }, + { + "epoch": 0.45, + "learning_rate": 5.293561663477885e-06, + "loss": 0.9159, + "step": 2169 + }, + { + "epoch": 0.45, + "learning_rate": 5.29292764985187e-06, + "loss": 1.195, + "step": 2170 + }, + { + "epoch": 0.45, + "learning_rate": 5.292293389850943e-06, + "loss": 0.9187, + "step": 2171 + }, + { + "epoch": 0.45, + "learning_rate": 5.2916588835432544e-06, + "loss": 1.0759, + "step": 2172 + }, + { + "epoch": 0.45, + "learning_rate": 5.291024130996983e-06, + "loss": 1.1, + "step": 2173 + }, + { + "epoch": 0.45, + "learning_rate": 5.290389132280332e-06, + "loss": 1.042, + "step": 2174 + }, + { + "epoch": 0.45, + "learning_rate": 5.289753887461531e-06, + "loss": 1.0089, + "step": 2175 + }, + { + "epoch": 0.45, + "learning_rate": 5.28911839660884e-06, + "loss": 1.1133, + "step": 2176 + }, + { + "epoch": 0.45, + "learning_rate": 5.2884826597905385e-06, + "loss": 1.0183, + "step": 2177 + }, + { + "epoch": 0.45, + "learning_rate": 5.28784667707494e-06, + "loss": 0.9107, + "step": 2178 + }, + { + "epoch": 0.45, + "learning_rate": 5.287210448530379e-06, + "loss": 1.2817, + "step": 2179 + }, + { + "epoch": 0.45, + "learning_rate": 5.286573974225218e-06, + "loss": 0.8715, + "step": 2180 + }, + { + "epoch": 0.45, + "learning_rate": 5.285937254227847e-06, + "loss": 1.1864, + "step": 2181 + }, + { + "epoch": 0.45, + "learning_rate": 5.2853002886066825e-06, + "loss": 0.8947, + "step": 2182 + }, + { + "epoch": 0.45, + "learning_rate": 5.284663077430163e-06, + "loss": 1.0336, + "step": 2183 + }, + { + "epoch": 0.45, + "learning_rate": 5.2840256207667595e-06, + "loss": 0.9945, + "step": 2184 + }, + { + "epoch": 0.45, + "learning_rate": 5.2833879186849664e-06, + "loss": 0.8628, + "step": 2185 + }, + { + "epoch": 0.45, + "learning_rate": 5.282749971253305e-06, + "loss": 0.9275, + "step": 2186 + }, + { + "epoch": 0.45, + "learning_rate": 5.282111778540321e-06, + "loss": 0.8971, + "step": 2187 + }, + { + "epoch": 0.46, + "learning_rate": 5.28147334061459e-06, + "loss": 0.8883, + "step": 2188 + }, + { + "epoch": 0.46, + "learning_rate": 5.28083465754471e-06, + "loss": 0.8309, + "step": 2189 + }, + { + "epoch": 0.46, + "learning_rate": 5.280195729399311e-06, + "loss": 0.8338, + "step": 2190 + }, + { + "epoch": 0.46, + "learning_rate": 5.279556556247042e-06, + "loss": 1.0008, + "step": 2191 + }, + { + "epoch": 0.46, + "learning_rate": 5.278917138156584e-06, + "loss": 1.1495, + "step": 2192 + }, + { + "epoch": 0.46, + "learning_rate": 5.278277475196642e-06, + "loss": 0.8336, + "step": 2193 + }, + { + "epoch": 0.46, + "learning_rate": 5.277637567435947e-06, + "loss": 1.0254, + "step": 2194 + }, + { + "epoch": 0.46, + "learning_rate": 5.276997414943259e-06, + "loss": 0.9133, + "step": 2195 + }, + { + "epoch": 0.46, + "learning_rate": 5.276357017787361e-06, + "loss": 1.0833, + "step": 2196 + }, + { + "epoch": 0.46, + "learning_rate": 5.275716376037065e-06, + "loss": 0.909, + "step": 2197 + }, + { + "epoch": 0.46, + "learning_rate": 5.275075489761205e-06, + "loss": 0.865, + "step": 2198 + }, + { + "epoch": 0.46, + "learning_rate": 5.274434359028647e-06, + "loss": 0.8087, + "step": 2199 + }, + { + "epoch": 0.46, + "learning_rate": 5.273792983908279e-06, + "loss": 0.9862, + "step": 2200 + }, + { + "epoch": 0.46, + "learning_rate": 5.273151364469017e-06, + "loss": 1.0059, + "step": 2201 + }, + { + "epoch": 0.46, + "learning_rate": 5.272509500779804e-06, + "loss": 0.9221, + "step": 2202 + }, + { + "epoch": 0.46, + "learning_rate": 5.271867392909606e-06, + "loss": 0.9621, + "step": 2203 + }, + { + "epoch": 0.46, + "learning_rate": 5.27122504092742e-06, + "loss": 0.9615, + "step": 2204 + }, + { + "epoch": 0.46, + "learning_rate": 5.270582444902265e-06, + "loss": 0.8818, + "step": 2205 + }, + { + "epoch": 0.46, + "learning_rate": 5.269939604903188e-06, + "loss": 1.0407, + "step": 2206 + }, + { + "epoch": 0.46, + "learning_rate": 5.269296520999263e-06, + "loss": 0.894, + "step": 2207 + }, + { + "epoch": 0.46, + "learning_rate": 5.2686531932595885e-06, + "loss": 0.9077, + "step": 2208 + }, + { + "epoch": 0.46, + "learning_rate": 5.2680096217532896e-06, + "loss": 0.9945, + "step": 2209 + }, + { + "epoch": 0.46, + "learning_rate": 5.267365806549519e-06, + "loss": 0.8381, + "step": 2210 + }, + { + "epoch": 0.46, + "learning_rate": 5.266721747717455e-06, + "loss": 0.914, + "step": 2211 + }, + { + "epoch": 0.46, + "learning_rate": 5.266077445326301e-06, + "loss": 1.0046, + "step": 2212 + }, + { + "epoch": 0.46, + "learning_rate": 5.2654328994452865e-06, + "loss": 0.8895, + "step": 2213 + }, + { + "epoch": 0.46, + "learning_rate": 5.264788110143668e-06, + "loss": 1.1502, + "step": 2214 + }, + { + "epoch": 0.46, + "learning_rate": 5.26414307749073e-06, + "loss": 0.9952, + "step": 2215 + }, + { + "epoch": 0.46, + "learning_rate": 5.2634978015557795e-06, + "loss": 0.9824, + "step": 2216 + }, + { + "epoch": 0.46, + "learning_rate": 5.262852282408152e-06, + "loss": 1.0216, + "step": 2217 + }, + { + "epoch": 0.46, + "learning_rate": 5.2622065201172075e-06, + "loss": 0.9112, + "step": 2218 + }, + { + "epoch": 0.46, + "learning_rate": 5.261560514752334e-06, + "loss": 0.9438, + "step": 2219 + }, + { + "epoch": 0.46, + "learning_rate": 5.260914266382945e-06, + "loss": 0.9328, + "step": 2220 + }, + { + "epoch": 0.46, + "learning_rate": 5.260267775078479e-06, + "loss": 0.8888, + "step": 2221 + }, + { + "epoch": 0.46, + "learning_rate": 5.259621040908402e-06, + "loss": 0.7303, + "step": 2222 + }, + { + "epoch": 0.46, + "learning_rate": 5.258974063942206e-06, + "loss": 0.877, + "step": 2223 + }, + { + "epoch": 0.46, + "learning_rate": 5.258326844249407e-06, + "loss": 0.9019, + "step": 2224 + }, + { + "epoch": 0.46, + "learning_rate": 5.25767938189955e-06, + "loss": 0.9131, + "step": 2225 + }, + { + "epoch": 0.46, + "learning_rate": 5.257031676962205e-06, + "loss": 0.8729, + "step": 2226 + }, + { + "epoch": 0.46, + "learning_rate": 5.256383729506967e-06, + "loss": 1.0595, + "step": 2227 + }, + { + "epoch": 0.46, + "learning_rate": 5.255735539603458e-06, + "loss": 0.9507, + "step": 2228 + }, + { + "epoch": 0.46, + "learning_rate": 5.255087107321327e-06, + "loss": 0.9108, + "step": 2229 + }, + { + "epoch": 0.46, + "learning_rate": 5.254438432730246e-06, + "loss": 0.7734, + "step": 2230 + }, + { + "epoch": 0.46, + "learning_rate": 5.253789515899918e-06, + "loss": 0.9291, + "step": 2231 + }, + { + "epoch": 0.46, + "learning_rate": 5.2531403569000665e-06, + "loss": 1.0132, + "step": 2232 + }, + { + "epoch": 0.46, + "learning_rate": 5.252490955800445e-06, + "loss": 0.9676, + "step": 2233 + }, + { + "epoch": 0.46, + "learning_rate": 5.25184131267083e-06, + "loss": 0.9758, + "step": 2234 + }, + { + "epoch": 0.46, + "learning_rate": 5.251191427581027e-06, + "loss": 1.0558, + "step": 2235 + }, + { + "epoch": 0.47, + "learning_rate": 5.250541300600865e-06, + "loss": 0.749, + "step": 2236 + }, + { + "epoch": 0.47, + "learning_rate": 5.249890931800202e-06, + "loss": 1.1418, + "step": 2237 + }, + { + "epoch": 0.47, + "learning_rate": 5.249240321248919e-06, + "loss": 0.9666, + "step": 2238 + }, + { + "epoch": 0.47, + "learning_rate": 5.248589469016923e-06, + "loss": 1.1441, + "step": 2239 + }, + { + "epoch": 0.47, + "learning_rate": 5.24793837517415e-06, + "loss": 0.8825, + "step": 2240 + }, + { + "epoch": 0.47, + "learning_rate": 5.2472870397905585e-06, + "loss": 1.1467, + "step": 2241 + }, + { + "epoch": 0.47, + "learning_rate": 5.246635462936135e-06, + "loss": 0.8301, + "step": 2242 + }, + { + "epoch": 0.47, + "learning_rate": 5.245983644680891e-06, + "loss": 0.763, + "step": 2243 + }, + { + "epoch": 0.47, + "learning_rate": 5.245331585094865e-06, + "loss": 1.0763, + "step": 2244 + }, + { + "epoch": 0.47, + "learning_rate": 5.244679284248121e-06, + "loss": 0.9151, + "step": 2245 + }, + { + "epoch": 0.47, + "learning_rate": 5.244026742210747e-06, + "loss": 0.8194, + "step": 2246 + }, + { + "epoch": 0.47, + "learning_rate": 5.2433739590528605e-06, + "loss": 0.9766, + "step": 2247 + }, + { + "epoch": 0.47, + "learning_rate": 5.242720934844602e-06, + "loss": 0.996, + "step": 2248 + }, + { + "epoch": 0.47, + "learning_rate": 5.2420676696561385e-06, + "loss": 0.8958, + "step": 2249 + }, + { + "epoch": 0.47, + "learning_rate": 5.241414163557664e-06, + "loss": 0.954, + "step": 2250 + }, + { + "epoch": 0.47, + "learning_rate": 5.240760416619398e-06, + "loss": 0.8122, + "step": 2251 + }, + { + "epoch": 0.47, + "learning_rate": 5.240106428911585e-06, + "loss": 0.912, + "step": 2252 + }, + { + "epoch": 0.47, + "learning_rate": 5.239452200504496e-06, + "loss": 1.1593, + "step": 2253 + }, + { + "epoch": 0.47, + "learning_rate": 5.238797731468428e-06, + "loss": 1.0324, + "step": 2254 + }, + { + "epoch": 0.47, + "learning_rate": 5.238143021873703e-06, + "loss": 1.1349, + "step": 2255 + }, + { + "epoch": 0.47, + "learning_rate": 5.237488071790671e-06, + "loss": 0.9285, + "step": 2256 + }, + { + "epoch": 0.47, + "learning_rate": 5.2368328812897045e-06, + "loss": 0.8325, + "step": 2257 + }, + { + "epoch": 0.47, + "learning_rate": 5.236177450441205e-06, + "loss": 1.004, + "step": 2258 + }, + { + "epoch": 0.47, + "learning_rate": 5.235521779315597e-06, + "loss": 0.9767, + "step": 2259 + }, + { + "epoch": 0.47, + "learning_rate": 5.2348658679833345e-06, + "loss": 0.9082, + "step": 2260 + }, + { + "epoch": 0.47, + "learning_rate": 5.234209716514894e-06, + "loss": 1.0185, + "step": 2261 + }, + { + "epoch": 0.47, + "learning_rate": 5.2335533249807784e-06, + "loss": 1.0599, + "step": 2262 + }, + { + "epoch": 0.47, + "learning_rate": 5.232896693451517e-06, + "loss": 0.8633, + "step": 2263 + }, + { + "epoch": 0.47, + "learning_rate": 5.232239821997666e-06, + "loss": 0.8465, + "step": 2264 + }, + { + "epoch": 0.47, + "learning_rate": 5.231582710689805e-06, + "loss": 0.779, + "step": 2265 + }, + { + "epoch": 0.47, + "learning_rate": 5.230925359598541e-06, + "loss": 1.0325, + "step": 2266 + }, + { + "epoch": 0.47, + "learning_rate": 5.230267768794507e-06, + "loss": 1.0861, + "step": 2267 + }, + { + "epoch": 0.47, + "learning_rate": 5.22960993834836e-06, + "loss": 0.9957, + "step": 2268 + }, + { + "epoch": 0.47, + "learning_rate": 5.228951868330784e-06, + "loss": 0.9446, + "step": 2269 + }, + { + "epoch": 0.47, + "learning_rate": 5.228293558812489e-06, + "loss": 0.9172, + "step": 2270 + }, + { + "epoch": 0.47, + "learning_rate": 5.22763500986421e-06, + "loss": 1.1187, + "step": 2271 + }, + { + "epoch": 0.47, + "learning_rate": 5.226976221556708e-06, + "loss": 0.8433, + "step": 2272 + }, + { + "epoch": 0.47, + "learning_rate": 5.22631719396077e-06, + "loss": 0.7467, + "step": 2273 + }, + { + "epoch": 0.47, + "learning_rate": 5.2256579271472086e-06, + "loss": 0.921, + "step": 2274 + }, + { + "epoch": 0.47, + "learning_rate": 5.2249984211868614e-06, + "loss": 0.7918, + "step": 2275 + }, + { + "epoch": 0.47, + "learning_rate": 5.2243386761505925e-06, + "loss": 1.2375, + "step": 2276 + }, + { + "epoch": 0.47, + "learning_rate": 5.223678692109292e-06, + "loss": 0.9202, + "step": 2277 + }, + { + "epoch": 0.47, + "learning_rate": 5.223018469133875e-06, + "loss": 0.9436, + "step": 2278 + }, + { + "epoch": 0.47, + "learning_rate": 5.222358007295282e-06, + "loss": 0.7107, + "step": 2279 + }, + { + "epoch": 0.47, + "learning_rate": 5.221697306664478e-06, + "loss": 1.075, + "step": 2280 + }, + { + "epoch": 0.47, + "learning_rate": 5.221036367312459e-06, + "loss": 0.8818, + "step": 2281 + }, + { + "epoch": 0.47, + "learning_rate": 5.220375189310241e-06, + "loss": 0.9534, + "step": 2282 + }, + { + "epoch": 0.47, + "learning_rate": 5.219713772728866e-06, + "loss": 1.0613, + "step": 2283 + }, + { + "epoch": 0.48, + "learning_rate": 5.219052117639405e-06, + "loss": 0.8267, + "step": 2284 + }, + { + "epoch": 0.48, + "learning_rate": 5.218390224112952e-06, + "loss": 0.9093, + "step": 2285 + }, + { + "epoch": 0.48, + "learning_rate": 5.217728092220628e-06, + "loss": 0.9458, + "step": 2286 + }, + { + "epoch": 0.48, + "learning_rate": 5.217065722033579e-06, + "loss": 0.9036, + "step": 2287 + }, + { + "epoch": 0.48, + "learning_rate": 5.216403113622976e-06, + "loss": 1.1513, + "step": 2288 + }, + { + "epoch": 0.48, + "learning_rate": 5.215740267060017e-06, + "loss": 0.8926, + "step": 2289 + }, + { + "epoch": 0.48, + "learning_rate": 5.2150771824159235e-06, + "loss": 1.1308, + "step": 2290 + }, + { + "epoch": 0.48, + "learning_rate": 5.214413859761946e-06, + "loss": 1.1498, + "step": 2291 + }, + { + "epoch": 0.48, + "learning_rate": 5.213750299169357e-06, + "loss": 0.8312, + "step": 2292 + }, + { + "epoch": 0.48, + "learning_rate": 5.213086500709455e-06, + "loss": 1.0423, + "step": 2293 + }, + { + "epoch": 0.48, + "learning_rate": 5.212422464453568e-06, + "loss": 1.2037, + "step": 2294 + }, + { + "epoch": 0.48, + "learning_rate": 5.211758190473045e-06, + "loss": 1.1828, + "step": 2295 + }, + { + "epoch": 0.48, + "learning_rate": 5.211093678839262e-06, + "loss": 0.9032, + "step": 2296 + }, + { + "epoch": 0.48, + "learning_rate": 5.210428929623622e-06, + "loss": 0.9327, + "step": 2297 + }, + { + "epoch": 0.48, + "learning_rate": 5.209763942897549e-06, + "loss": 0.9639, + "step": 2298 + }, + { + "epoch": 0.48, + "learning_rate": 5.2090987187324985e-06, + "loss": 0.9087, + "step": 2299 + }, + { + "epoch": 0.48, + "learning_rate": 5.2084332571999496e-06, + "loss": 1.2201, + "step": 2300 + }, + { + "epoch": 0.48, + "learning_rate": 5.207767558371404e-06, + "loss": 0.9069, + "step": 2301 + }, + { + "epoch": 0.48, + "learning_rate": 5.207101622318392e-06, + "loss": 1.0056, + "step": 2302 + }, + { + "epoch": 0.48, + "learning_rate": 5.206435449112467e-06, + "loss": 0.9922, + "step": 2303 + }, + { + "epoch": 0.48, + "learning_rate": 5.2057690388252124e-06, + "loss": 0.7829, + "step": 2304 + }, + { + "epoch": 0.48, + "learning_rate": 5.205102391528231e-06, + "loss": 0.8016, + "step": 2305 + }, + { + "epoch": 0.48, + "learning_rate": 5.2044355072931545e-06, + "loss": 1.1343, + "step": 2306 + }, + { + "epoch": 0.48, + "learning_rate": 5.2037683861916416e-06, + "loss": 1.0169, + "step": 2307 + }, + { + "epoch": 0.48, + "learning_rate": 5.2031010282953715e-06, + "loss": 0.9702, + "step": 2308 + }, + { + "epoch": 0.48, + "learning_rate": 5.202433433676053e-06, + "loss": 0.9735, + "step": 2309 + }, + { + "epoch": 0.48, + "learning_rate": 5.201765602405421e-06, + "loss": 1.0213, + "step": 2310 + }, + { + "epoch": 0.48, + "learning_rate": 5.20109753455523e-06, + "loss": 1.0009, + "step": 2311 + }, + { + "epoch": 0.48, + "learning_rate": 5.200429230197267e-06, + "loss": 1.0114, + "step": 2312 + }, + { + "epoch": 0.48, + "learning_rate": 5.19976068940334e-06, + "loss": 1.1238, + "step": 2313 + }, + { + "epoch": 0.48, + "learning_rate": 5.199091912245285e-06, + "loss": 1.1496, + "step": 2314 + }, + { + "epoch": 0.48, + "learning_rate": 5.198422898794959e-06, + "loss": 0.9576, + "step": 2315 + }, + { + "epoch": 0.48, + "learning_rate": 5.19775364912425e-06, + "loss": 0.9875, + "step": 2316 + }, + { + "epoch": 0.48, + "learning_rate": 5.197084163305068e-06, + "loss": 1.0115, + "step": 2317 + }, + { + "epoch": 0.48, + "learning_rate": 5.196414441409351e-06, + "loss": 0.924, + "step": 2318 + }, + { + "epoch": 0.48, + "learning_rate": 5.195744483509058e-06, + "loss": 0.854, + "step": 2319 + }, + { + "epoch": 0.48, + "learning_rate": 5.195074289676177e-06, + "loss": 0.9339, + "step": 2320 + }, + { + "epoch": 0.48, + "learning_rate": 5.1944038599827195e-06, + "loss": 0.9085, + "step": 2321 + }, + { + "epoch": 0.48, + "learning_rate": 5.193733194500725e-06, + "loss": 0.8463, + "step": 2322 + }, + { + "epoch": 0.48, + "learning_rate": 5.193062293302255e-06, + "loss": 1.0058, + "step": 2323 + }, + { + "epoch": 0.48, + "learning_rate": 5.192391156459397e-06, + "loss": 1.144, + "step": 2324 + }, + { + "epoch": 0.48, + "learning_rate": 5.191719784044267e-06, + "loss": 0.8709, + "step": 2325 + }, + { + "epoch": 0.48, + "learning_rate": 5.1910481761290025e-06, + "loss": 0.9252, + "step": 2326 + }, + { + "epoch": 0.48, + "learning_rate": 5.1903763327857675e-06, + "loss": 0.8354, + "step": 2327 + }, + { + "epoch": 0.48, + "learning_rate": 5.1897042540867514e-06, + "loss": 0.9169, + "step": 2328 + }, + { + "epoch": 0.48, + "learning_rate": 5.18903194010417e-06, + "loss": 0.846, + "step": 2329 + }, + { + "epoch": 0.48, + "learning_rate": 5.188359390910263e-06, + "loss": 0.8188, + "step": 2330 + }, + { + "epoch": 0.48, + "learning_rate": 5.187686606577295e-06, + "loss": 1.1429, + "step": 2331 + }, + { + "epoch": 0.49, + "learning_rate": 5.1870135871775575e-06, + "loss": 0.865, + "step": 2332 + }, + { + "epoch": 0.49, + "learning_rate": 5.1863403327833665e-06, + "loss": 0.9829, + "step": 2333 + }, + { + "epoch": 0.49, + "learning_rate": 5.185666843467062e-06, + "loss": 0.9533, + "step": 2334 + }, + { + "epoch": 0.49, + "learning_rate": 5.184993119301011e-06, + "loss": 0.7915, + "step": 2335 + }, + { + "epoch": 0.49, + "learning_rate": 5.184319160357606e-06, + "loss": 1.0291, + "step": 2336 + }, + { + "epoch": 0.49, + "learning_rate": 5.183644966709263e-06, + "loss": 0.8566, + "step": 2337 + }, + { + "epoch": 0.49, + "learning_rate": 5.182970538428424e-06, + "loss": 0.9398, + "step": 2338 + }, + { + "epoch": 0.49, + "learning_rate": 5.182295875587557e-06, + "loss": 1.0052, + "step": 2339 + }, + { + "epoch": 0.49, + "learning_rate": 5.1816209782591524e-06, + "loss": 0.9298, + "step": 2340 + }, + { + "epoch": 0.49, + "learning_rate": 5.180945846515731e-06, + "loss": 0.7951, + "step": 2341 + }, + { + "epoch": 0.49, + "learning_rate": 5.1802704804298335e-06, + "loss": 0.794, + "step": 2342 + }, + { + "epoch": 0.49, + "learning_rate": 5.179594880074028e-06, + "loss": 1.0035, + "step": 2343 + }, + { + "epoch": 0.49, + "learning_rate": 5.178919045520908e-06, + "loss": 1.1776, + "step": 2344 + }, + { + "epoch": 0.49, + "learning_rate": 5.178242976843093e-06, + "loss": 0.8599, + "step": 2345 + }, + { + "epoch": 0.49, + "learning_rate": 5.177566674113226e-06, + "loss": 1.2292, + "step": 2346 + }, + { + "epoch": 0.49, + "learning_rate": 5.176890137403973e-06, + "loss": 0.959, + "step": 2347 + }, + { + "epoch": 0.49, + "learning_rate": 5.176213366788031e-06, + "loss": 0.9405, + "step": 2348 + }, + { + "epoch": 0.49, + "learning_rate": 5.175536362338119e-06, + "loss": 1.0396, + "step": 2349 + }, + { + "epoch": 0.49, + "learning_rate": 5.17485912412698e-06, + "loss": 0.8388, + "step": 2350 + }, + { + "epoch": 0.49, + "learning_rate": 5.174181652227383e-06, + "loss": 0.8505, + "step": 2351 + }, + { + "epoch": 0.49, + "learning_rate": 5.173503946712122e-06, + "loss": 1.1923, + "step": 2352 + }, + { + "epoch": 0.49, + "learning_rate": 5.172826007654018e-06, + "loss": 0.8992, + "step": 2353 + }, + { + "epoch": 0.49, + "learning_rate": 5.172147835125914e-06, + "loss": 0.7107, + "step": 2354 + }, + { + "epoch": 0.49, + "learning_rate": 5.17146942920068e-06, + "loss": 0.835, + "step": 2355 + }, + { + "epoch": 0.49, + "learning_rate": 5.1707907899512104e-06, + "loss": 0.9372, + "step": 2356 + }, + { + "epoch": 0.49, + "learning_rate": 5.1701119174504265e-06, + "loss": 0.8694, + "step": 2357 + }, + { + "epoch": 0.49, + "learning_rate": 5.169432811771271e-06, + "loss": 1.0199, + "step": 2358 + }, + { + "epoch": 0.49, + "learning_rate": 5.168753472986715e-06, + "loss": 0.8876, + "step": 2359 + }, + { + "epoch": 0.49, + "learning_rate": 5.168073901169753e-06, + "loss": 0.9634, + "step": 2360 + }, + { + "epoch": 0.49, + "learning_rate": 5.167394096393405e-06, + "loss": 0.885, + "step": 2361 + }, + { + "epoch": 0.49, + "learning_rate": 5.166714058730716e-06, + "loss": 1.007, + "step": 2362 + }, + { + "epoch": 0.49, + "learning_rate": 5.1660337882547565e-06, + "loss": 0.9285, + "step": 2363 + }, + { + "epoch": 0.49, + "learning_rate": 5.165353285038619e-06, + "loss": 1.0332, + "step": 2364 + }, + { + "epoch": 0.49, + "learning_rate": 5.164672549155428e-06, + "loss": 1.0712, + "step": 2365 + }, + { + "epoch": 0.49, + "learning_rate": 5.1639915806783256e-06, + "loss": 0.8825, + "step": 2366 + }, + { + "epoch": 0.49, + "learning_rate": 5.1633103796804815e-06, + "loss": 0.83, + "step": 2367 + }, + { + "epoch": 0.49, + "learning_rate": 5.1626289462350916e-06, + "loss": 0.9619, + "step": 2368 + }, + { + "epoch": 0.49, + "learning_rate": 5.161947280415376e-06, + "loss": 1.074, + "step": 2369 + }, + { + "epoch": 0.49, + "learning_rate": 5.1612653822945795e-06, + "loss": 1.0992, + "step": 2370 + }, + { + "epoch": 0.49, + "learning_rate": 5.160583251945972e-06, + "loss": 1.1627, + "step": 2371 + }, + { + "epoch": 0.49, + "learning_rate": 5.159900889442848e-06, + "loss": 1.0249, + "step": 2372 + }, + { + "epoch": 0.49, + "learning_rate": 5.159218294858527e-06, + "loss": 0.8855, + "step": 2373 + }, + { + "epoch": 0.49, + "learning_rate": 5.158535468266356e-06, + "loss": 0.7929, + "step": 2374 + }, + { + "epoch": 0.49, + "learning_rate": 5.157852409739701e-06, + "loss": 0.868, + "step": 2375 + }, + { + "epoch": 0.49, + "learning_rate": 5.15716911935196e-06, + "loss": 0.7969, + "step": 2376 + }, + { + "epoch": 0.49, + "learning_rate": 5.15648559717655e-06, + "loss": 1.0913, + "step": 2377 + }, + { + "epoch": 0.49, + "learning_rate": 5.155801843286917e-06, + "loss": 0.7859, + "step": 2378 + }, + { + "epoch": 0.49, + "learning_rate": 5.15511785775653e-06, + "loss": 0.8983, + "step": 2379 + }, + { + "epoch": 0.5, + "learning_rate": 5.1544336406588826e-06, + "loss": 1.0068, + "step": 2380 + }, + { + "epoch": 0.5, + "learning_rate": 5.153749192067494e-06, + "loss": 0.9066, + "step": 2381 + }, + { + "epoch": 0.5, + "learning_rate": 5.153064512055909e-06, + "loss": 1.0031, + "step": 2382 + }, + { + "epoch": 0.5, + "learning_rate": 5.152379600697696e-06, + "loss": 0.9813, + "step": 2383 + }, + { + "epoch": 0.5, + "learning_rate": 5.151694458066447e-06, + "loss": 0.9333, + "step": 2384 + }, + { + "epoch": 0.5, + "learning_rate": 5.151009084235783e-06, + "loss": 1.0152, + "step": 2385 + }, + { + "epoch": 0.5, + "learning_rate": 5.150323479279347e-06, + "loss": 0.9322, + "step": 2386 + }, + { + "epoch": 0.5, + "learning_rate": 5.149637643270807e-06, + "loss": 1.0654, + "step": 2387 + }, + { + "epoch": 0.5, + "learning_rate": 5.148951576283853e-06, + "loss": 0.817, + "step": 2388 + }, + { + "epoch": 0.5, + "learning_rate": 5.1482652783922075e-06, + "loss": 1.1025, + "step": 2389 + }, + { + "epoch": 0.5, + "learning_rate": 5.14757874966961e-06, + "loss": 1.1547, + "step": 2390 + }, + { + "epoch": 0.5, + "learning_rate": 5.14689199018983e-06, + "loss": 0.964, + "step": 2391 + }, + { + "epoch": 0.5, + "learning_rate": 5.14620500002666e-06, + "loss": 0.862, + "step": 2392 + }, + { + "epoch": 0.5, + "learning_rate": 5.145517779253914e-06, + "loss": 1.0674, + "step": 2393 + }, + { + "epoch": 0.5, + "learning_rate": 5.144830327945436e-06, + "loss": 0.9691, + "step": 2394 + }, + { + "epoch": 0.5, + "learning_rate": 5.144142646175093e-06, + "loss": 1.0154, + "step": 2395 + }, + { + "epoch": 0.5, + "learning_rate": 5.143454734016775e-06, + "loss": 0.938, + "step": 2396 + }, + { + "epoch": 0.5, + "learning_rate": 5.1427665915444e-06, + "loss": 1.0227, + "step": 2397 + }, + { + "epoch": 0.5, + "learning_rate": 5.1420782188319065e-06, + "loss": 0.9142, + "step": 2398 + }, + { + "epoch": 0.5, + "learning_rate": 5.141389615953261e-06, + "loss": 0.9136, + "step": 2399 + }, + { + "epoch": 0.5, + "learning_rate": 5.140700782982454e-06, + "loss": 1.0439, + "step": 2400 + }, + { + "epoch": 0.5, + "learning_rate": 5.140011719993502e-06, + "loss": 0.876, + "step": 2401 + }, + { + "epoch": 0.5, + "learning_rate": 5.139322427060442e-06, + "loss": 0.931, + "step": 2402 + }, + { + "epoch": 0.5, + "learning_rate": 5.138632904257341e-06, + "loss": 0.9063, + "step": 2403 + }, + { + "epoch": 0.5, + "learning_rate": 5.137943151658287e-06, + "loss": 1.0587, + "step": 2404 + }, + { + "epoch": 0.5, + "learning_rate": 5.1372531693373925e-06, + "loss": 0.9774, + "step": 2405 + }, + { + "epoch": 0.5, + "learning_rate": 5.1365629573687984e-06, + "loss": 0.7927, + "step": 2406 + }, + { + "epoch": 0.5, + "learning_rate": 5.135872515826667e-06, + "loss": 1.0598, + "step": 2407 + }, + { + "epoch": 0.5, + "learning_rate": 5.135181844785186e-06, + "loss": 0.7907, + "step": 2408 + }, + { + "epoch": 0.5, + "learning_rate": 5.134490944318569e-06, + "loss": 1.0296, + "step": 2409 + }, + { + "epoch": 0.5, + "learning_rate": 5.133799814501052e-06, + "loss": 0.9564, + "step": 2410 + }, + { + "epoch": 0.5, + "learning_rate": 5.133108455406897e-06, + "loss": 0.7194, + "step": 2411 + }, + { + "epoch": 0.5, + "learning_rate": 5.132416867110389e-06, + "loss": 1.1497, + "step": 2412 + }, + { + "epoch": 0.5, + "learning_rate": 5.131725049685843e-06, + "loss": 0.872, + "step": 2413 + }, + { + "epoch": 0.5, + "learning_rate": 5.131033003207592e-06, + "loss": 0.8752, + "step": 2414 + }, + { + "epoch": 0.5, + "learning_rate": 5.130340727749996e-06, + "loss": 0.8921, + "step": 2415 + }, + { + "epoch": 0.5, + "learning_rate": 5.129648223387442e-06, + "loss": 0.917, + "step": 2416 + }, + { + "epoch": 0.5, + "learning_rate": 5.128955490194338e-06, + "loss": 1.1003, + "step": 2417 + }, + { + "epoch": 0.5, + "learning_rate": 5.128262528245118e-06, + "loss": 1.0014, + "step": 2418 + }, + { + "epoch": 0.5, + "learning_rate": 5.127569337614242e-06, + "loss": 0.9083, + "step": 2419 + }, + { + "epoch": 0.5, + "learning_rate": 5.126875918376192e-06, + "loss": 0.9644, + "step": 2420 + }, + { + "epoch": 0.5, + "learning_rate": 5.126182270605476e-06, + "loss": 0.8377, + "step": 2421 + }, + { + "epoch": 0.5, + "learning_rate": 5.125488394376628e-06, + "loss": 0.8545, + "step": 2422 + }, + { + "epoch": 0.5, + "learning_rate": 5.124794289764202e-06, + "loss": 1.0247, + "step": 2423 + }, + { + "epoch": 0.5, + "learning_rate": 5.124099956842783e-06, + "loss": 1.0255, + "step": 2424 + }, + { + "epoch": 0.5, + "learning_rate": 5.123405395686974e-06, + "loss": 1.012, + "step": 2425 + }, + { + "epoch": 0.5, + "learning_rate": 5.122710606371407e-06, + "loss": 0.9471, + "step": 2426 + }, + { + "epoch": 0.5, + "learning_rate": 5.122015588970737e-06, + "loss": 0.7059, + "step": 2427 + }, + { + "epoch": 0.5, + "learning_rate": 5.1213203435596425e-06, + "loss": 0.8125, + "step": 2428 + }, + { + "epoch": 0.51, + "learning_rate": 5.120624870212829e-06, + "loss": 0.8683, + "step": 2429 + }, + { + "epoch": 0.51, + "learning_rate": 5.119929169005026e-06, + "loss": 1.3407, + "step": 2430 + }, + { + "epoch": 0.51, + "learning_rate": 5.119233240010982e-06, + "loss": 0.845, + "step": 2431 + }, + { + "epoch": 0.51, + "learning_rate": 5.11853708330548e-06, + "loss": 0.9113, + "step": 2432 + }, + { + "epoch": 0.51, + "learning_rate": 5.117840698963319e-06, + "loss": 0.8465, + "step": 2433 + }, + { + "epoch": 0.51, + "learning_rate": 5.117144087059327e-06, + "loss": 1.1675, + "step": 2434 + }, + { + "epoch": 0.51, + "learning_rate": 5.116447247668352e-06, + "loss": 0.9437, + "step": 2435 + }, + { + "epoch": 0.51, + "learning_rate": 5.1157501808652724e-06, + "loss": 0.803, + "step": 2436 + }, + { + "epoch": 0.51, + "learning_rate": 5.1150528867249864e-06, + "loss": 0.8941, + "step": 2437 + }, + { + "epoch": 0.51, + "learning_rate": 5.114355365322418e-06, + "loss": 1.2692, + "step": 2438 + }, + { + "epoch": 0.51, + "learning_rate": 5.113657616732518e-06, + "loss": 1.0514, + "step": 2439 + }, + { + "epoch": 0.51, + "learning_rate": 5.112959641030257e-06, + "loss": 0.8576, + "step": 2440 + }, + { + "epoch": 0.51, + "learning_rate": 5.112261438290633e-06, + "loss": 1.0214, + "step": 2441 + }, + { + "epoch": 0.51, + "learning_rate": 5.111563008588669e-06, + "loss": 1.0628, + "step": 2442 + }, + { + "epoch": 0.51, + "learning_rate": 5.11086435199941e-06, + "loss": 1.007, + "step": 2443 + }, + { + "epoch": 0.51, + "learning_rate": 5.1101654685979285e-06, + "loss": 0.8617, + "step": 2444 + }, + { + "epoch": 0.51, + "learning_rate": 5.109466358459316e-06, + "loss": 1.0714, + "step": 2445 + }, + { + "epoch": 0.51, + "learning_rate": 5.108767021658695e-06, + "loss": 1.1484, + "step": 2446 + }, + { + "epoch": 0.51, + "learning_rate": 5.108067458271207e-06, + "loss": 1.0369, + "step": 2447 + }, + { + "epoch": 0.51, + "learning_rate": 5.1073676683720216e-06, + "loss": 0.9214, + "step": 2448 + }, + { + "epoch": 0.51, + "learning_rate": 5.106667652036331e-06, + "loss": 1.0162, + "step": 2449 + }, + { + "epoch": 0.51, + "learning_rate": 5.105967409339351e-06, + "loss": 1.0225, + "step": 2450 + }, + { + "epoch": 0.51, + "learning_rate": 5.105266940356322e-06, + "loss": 0.9784, + "step": 2451 + }, + { + "epoch": 0.51, + "learning_rate": 5.104566245162512e-06, + "loss": 0.8025, + "step": 2452 + }, + { + "epoch": 0.51, + "learning_rate": 5.103865323833209e-06, + "loss": 1.1102, + "step": 2453 + }, + { + "epoch": 0.51, + "learning_rate": 5.103164176443727e-06, + "loss": 1.0883, + "step": 2454 + }, + { + "epoch": 0.51, + "learning_rate": 5.102462803069404e-06, + "loss": 0.8716, + "step": 2455 + }, + { + "epoch": 0.51, + "learning_rate": 5.1017612037856036e-06, + "loss": 1.1199, + "step": 2456 + }, + { + "epoch": 0.51, + "learning_rate": 5.101059378667711e-06, + "loss": 0.932, + "step": 2457 + }, + { + "epoch": 0.51, + "learning_rate": 5.100357327791138e-06, + "loss": 1.1086, + "step": 2458 + }, + { + "epoch": 0.51, + "learning_rate": 5.099655051231321e-06, + "loss": 0.7941, + "step": 2459 + }, + { + "epoch": 0.51, + "learning_rate": 5.0989525490637176e-06, + "loss": 0.9141, + "step": 2460 + }, + { + "epoch": 0.51, + "learning_rate": 5.0982498213638125e-06, + "loss": 1.0351, + "step": 2461 + }, + { + "epoch": 0.51, + "learning_rate": 5.097546868207114e-06, + "loss": 0.9666, + "step": 2462 + }, + { + "epoch": 0.51, + "learning_rate": 5.096843689669155e-06, + "loss": 0.908, + "step": 2463 + }, + { + "epoch": 0.51, + "learning_rate": 5.096140285825491e-06, + "loss": 0.8696, + "step": 2464 + }, + { + "epoch": 0.51, + "learning_rate": 5.095436656751702e-06, + "loss": 0.9934, + "step": 2465 + }, + { + "epoch": 0.51, + "learning_rate": 5.094732802523394e-06, + "loss": 0.7219, + "step": 2466 + }, + { + "epoch": 0.51, + "learning_rate": 5.094028723216196e-06, + "loss": 0.7932, + "step": 2467 + }, + { + "epoch": 0.51, + "learning_rate": 5.093324418905761e-06, + "loss": 1.033, + "step": 2468 + }, + { + "epoch": 0.51, + "learning_rate": 5.092619889667767e-06, + "loss": 0.9572, + "step": 2469 + }, + { + "epoch": 0.51, + "learning_rate": 5.091915135577915e-06, + "loss": 0.8775, + "step": 2470 + }, + { + "epoch": 0.51, + "learning_rate": 5.091210156711931e-06, + "loss": 0.873, + "step": 2471 + }, + { + "epoch": 0.51, + "learning_rate": 5.090504953145565e-06, + "loss": 0.9255, + "step": 2472 + }, + { + "epoch": 0.51, + "learning_rate": 5.08979952495459e-06, + "loss": 0.8991, + "step": 2473 + }, + { + "epoch": 0.51, + "learning_rate": 5.089093872214807e-06, + "loss": 0.8108, + "step": 2474 + }, + { + "epoch": 0.51, + "learning_rate": 5.088387995002035e-06, + "loss": 1.1737, + "step": 2475 + }, + { + "epoch": 0.51, + "learning_rate": 5.087681893392122e-06, + "loss": 1.0651, + "step": 2476 + }, + { + "epoch": 0.52, + "learning_rate": 5.086975567460939e-06, + "loss": 0.9521, + "step": 2477 + }, + { + "epoch": 0.52, + "learning_rate": 5.086269017284381e-06, + "loss": 0.9331, + "step": 2478 + }, + { + "epoch": 0.52, + "learning_rate": 5.0855622429383635e-06, + "loss": 0.9467, + "step": 2479 + }, + { + "epoch": 0.52, + "learning_rate": 5.084855244498834e-06, + "loss": 0.9204, + "step": 2480 + }, + { + "epoch": 0.52, + "learning_rate": 5.084148022041757e-06, + "loss": 0.9802, + "step": 2481 + }, + { + "epoch": 0.52, + "learning_rate": 5.083440575643123e-06, + "loss": 0.9266, + "step": 2482 + }, + { + "epoch": 0.52, + "learning_rate": 5.082732905378948e-06, + "loss": 1.1413, + "step": 2483 + }, + { + "epoch": 0.52, + "learning_rate": 5.082025011325272e-06, + "loss": 1.0594, + "step": 2484 + }, + { + "epoch": 0.52, + "learning_rate": 5.081316893558156e-06, + "loss": 0.9664, + "step": 2485 + }, + { + "epoch": 0.52, + "learning_rate": 5.080608552153688e-06, + "loss": 0.8588, + "step": 2486 + }, + { + "epoch": 0.52, + "learning_rate": 5.07989998718798e-06, + "loss": 0.8075, + "step": 2487 + }, + { + "epoch": 0.52, + "learning_rate": 5.079191198737167e-06, + "loss": 0.9046, + "step": 2488 + }, + { + "epoch": 0.52, + "learning_rate": 5.078482186877408e-06, + "loss": 0.9701, + "step": 2489 + }, + { + "epoch": 0.52, + "learning_rate": 5.077772951684886e-06, + "loss": 0.9951, + "step": 2490 + }, + { + "epoch": 0.52, + "learning_rate": 5.077063493235809e-06, + "loss": 0.8266, + "step": 2491 + }, + { + "epoch": 0.52, + "learning_rate": 5.076353811606408e-06, + "loss": 0.9048, + "step": 2492 + }, + { + "epoch": 0.52, + "learning_rate": 5.075643906872938e-06, + "loss": 0.695, + "step": 2493 + }, + { + "epoch": 0.52, + "learning_rate": 5.074933779111679e-06, + "loss": 1.0137, + "step": 2494 + }, + { + "epoch": 0.52, + "learning_rate": 5.074223428398932e-06, + "loss": 0.9308, + "step": 2495 + }, + { + "epoch": 0.52, + "learning_rate": 5.073512854811026e-06, + "loss": 0.8736, + "step": 2496 + }, + { + "epoch": 0.52, + "learning_rate": 5.072802058424312e-06, + "loss": 0.9095, + "step": 2497 + }, + { + "epoch": 0.52, + "learning_rate": 5.072091039315165e-06, + "loss": 0.978, + "step": 2498 + }, + { + "epoch": 0.52, + "learning_rate": 5.0713797975599825e-06, + "loss": 0.8631, + "step": 2499 + }, + { + "epoch": 0.52, + "learning_rate": 5.07066833323519e-06, + "loss": 1.0013, + "step": 2500 + }, + { + "epoch": 0.52, + "learning_rate": 5.0699566464172326e-06, + "loss": 0.9105, + "step": 2501 + }, + { + "epoch": 0.52, + "learning_rate": 5.069244737182581e-06, + "loss": 1.1299, + "step": 2502 + }, + { + "epoch": 0.52, + "learning_rate": 5.06853260560773e-06, + "loss": 1.017, + "step": 2503 + }, + { + "epoch": 0.52, + "learning_rate": 5.067820251769198e-06, + "loss": 1.0599, + "step": 2504 + }, + { + "epoch": 0.52, + "learning_rate": 5.067107675743528e-06, + "loss": 0.9005, + "step": 2505 + }, + { + "epoch": 0.52, + "learning_rate": 5.066394877607285e-06, + "loss": 0.9718, + "step": 2506 + }, + { + "epoch": 0.52, + "learning_rate": 5.065681857437061e-06, + "loss": 0.9717, + "step": 2507 + }, + { + "epoch": 0.52, + "learning_rate": 5.064968615309468e-06, + "loss": 1.0909, + "step": 2508 + }, + { + "epoch": 0.52, + "learning_rate": 5.064255151301144e-06, + "loss": 1.0547, + "step": 2509 + }, + { + "epoch": 0.52, + "learning_rate": 5.063541465488752e-06, + "loss": 0.8874, + "step": 2510 + }, + { + "epoch": 0.52, + "learning_rate": 5.0628275579489775e-06, + "loss": 0.9591, + "step": 2511 + }, + { + "epoch": 0.52, + "learning_rate": 5.062113428758528e-06, + "loss": 0.9388, + "step": 2512 + }, + { + "epoch": 0.52, + "learning_rate": 5.061399077994138e-06, + "loss": 1.0513, + "step": 2513 + }, + { + "epoch": 0.52, + "learning_rate": 5.060684505732565e-06, + "loss": 0.9671, + "step": 2514 + }, + { + "epoch": 0.52, + "learning_rate": 5.059969712050588e-06, + "loss": 0.8681, + "step": 2515 + }, + { + "epoch": 0.52, + "learning_rate": 5.059254697025013e-06, + "loss": 0.8135, + "step": 2516 + }, + { + "epoch": 0.52, + "learning_rate": 5.058539460732668e-06, + "loss": 0.8027, + "step": 2517 + }, + { + "epoch": 0.52, + "learning_rate": 5.057824003250403e-06, + "loss": 1.1504, + "step": 2518 + }, + { + "epoch": 0.52, + "learning_rate": 5.057108324655098e-06, + "loss": 0.9563, + "step": 2519 + }, + { + "epoch": 0.52, + "learning_rate": 5.056392425023649e-06, + "loss": 0.8946, + "step": 2520 + }, + { + "epoch": 0.52, + "learning_rate": 5.055676304432981e-06, + "loss": 1.0663, + "step": 2521 + }, + { + "epoch": 0.52, + "learning_rate": 5.054959962960041e-06, + "loss": 0.7307, + "step": 2522 + }, + { + "epoch": 0.52, + "learning_rate": 5.054243400681799e-06, + "loss": 1.1529, + "step": 2523 + }, + { + "epoch": 0.52, + "learning_rate": 5.053526617675251e-06, + "loss": 0.9176, + "step": 2524 + }, + { + "epoch": 0.53, + "learning_rate": 5.052809614017413e-06, + "loss": 0.911, + "step": 2525 + }, + { + "epoch": 0.53, + "learning_rate": 5.05209238978533e-06, + "loss": 0.9896, + "step": 2526 + }, + { + "epoch": 0.53, + "learning_rate": 5.051374945056065e-06, + "loss": 1.1208, + "step": 2527 + }, + { + "epoch": 0.53, + "learning_rate": 5.050657279906709e-06, + "loss": 0.8298, + "step": 2528 + }, + { + "epoch": 0.53, + "learning_rate": 5.049939394414375e-06, + "loss": 0.8746, + "step": 2529 + }, + { + "epoch": 0.53, + "learning_rate": 5.0492212886562e-06, + "loss": 0.9392, + "step": 2530 + }, + { + "epoch": 0.53, + "learning_rate": 5.048502962709342e-06, + "loss": 0.8442, + "step": 2531 + }, + { + "epoch": 0.53, + "learning_rate": 5.047784416650988e-06, + "loss": 0.9574, + "step": 2532 + }, + { + "epoch": 0.53, + "learning_rate": 5.047065650558344e-06, + "loss": 1.3257, + "step": 2533 + }, + { + "epoch": 0.53, + "learning_rate": 5.046346664508642e-06, + "loss": 0.9249, + "step": 2534 + }, + { + "epoch": 0.53, + "learning_rate": 5.0456274585791375e-06, + "loss": 1.0227, + "step": 2535 + }, + { + "epoch": 0.53, + "learning_rate": 5.0449080328471084e-06, + "loss": 0.949, + "step": 2536 + }, + { + "epoch": 0.53, + "learning_rate": 5.0441883873898565e-06, + "loss": 0.9935, + "step": 2537 + }, + { + "epoch": 0.53, + "learning_rate": 5.043468522284709e-06, + "loss": 0.9295, + "step": 2538 + }, + { + "epoch": 0.53, + "learning_rate": 5.0427484376090144e-06, + "loss": 0.9812, + "step": 2539 + }, + { + "epoch": 0.53, + "learning_rate": 5.042028133440144e-06, + "loss": 0.8358, + "step": 2540 + }, + { + "epoch": 0.53, + "learning_rate": 5.041307609855499e-06, + "loss": 1.119, + "step": 2541 + }, + { + "epoch": 0.53, + "learning_rate": 5.040586866932496e-06, + "loss": 1.3557, + "step": 2542 + }, + { + "epoch": 0.53, + "learning_rate": 5.03986590474858e-06, + "loss": 0.9347, + "step": 2543 + }, + { + "epoch": 0.53, + "learning_rate": 5.0391447233812185e-06, + "loss": 0.9196, + "step": 2544 + }, + { + "epoch": 0.53, + "learning_rate": 5.038423322907901e-06, + "loss": 0.9909, + "step": 2545 + }, + { + "epoch": 0.53, + "learning_rate": 5.037701703406143e-06, + "loss": 1.0625, + "step": 2546 + }, + { + "epoch": 0.53, + "learning_rate": 5.036979864953483e-06, + "loss": 1.2342, + "step": 2547 + }, + { + "epoch": 0.53, + "learning_rate": 5.0362578076274815e-06, + "loss": 0.9071, + "step": 2548 + }, + { + "epoch": 0.53, + "learning_rate": 5.035535531505725e-06, + "loss": 0.8573, + "step": 2549 + }, + { + "epoch": 0.53, + "learning_rate": 5.034813036665819e-06, + "loss": 1.0338, + "step": 2550 + }, + { + "epoch": 0.53, + "learning_rate": 5.034090323185398e-06, + "loss": 1.0673, + "step": 2551 + }, + { + "epoch": 0.53, + "learning_rate": 5.033367391142118e-06, + "loss": 0.8365, + "step": 2552 + }, + { + "epoch": 0.53, + "learning_rate": 5.032644240613655e-06, + "loss": 0.9659, + "step": 2553 + }, + { + "epoch": 0.53, + "learning_rate": 5.031920871677715e-06, + "loss": 1.081, + "step": 2554 + }, + { + "epoch": 0.53, + "learning_rate": 5.0311972844120215e-06, + "loss": 1.0689, + "step": 2555 + }, + { + "epoch": 0.53, + "learning_rate": 5.0304734788943254e-06, + "loss": 0.8112, + "step": 2556 + }, + { + "epoch": 0.53, + "learning_rate": 5.029749455202398e-06, + "loss": 0.9363, + "step": 2557 + }, + { + "epoch": 0.53, + "learning_rate": 5.029025213414037e-06, + "loss": 1.176, + "step": 2558 + }, + { + "epoch": 0.53, + "learning_rate": 5.028300753607062e-06, + "loss": 0.9279, + "step": 2559 + }, + { + "epoch": 0.53, + "learning_rate": 5.0275760758593154e-06, + "loss": 0.8483, + "step": 2560 + }, + { + "epoch": 0.53, + "learning_rate": 5.026851180248665e-06, + "loss": 0.975, + "step": 2561 + }, + { + "epoch": 0.53, + "learning_rate": 5.026126066852998e-06, + "loss": 0.9939, + "step": 2562 + }, + { + "epoch": 0.53, + "learning_rate": 5.025400735750231e-06, + "loss": 0.8144, + "step": 2563 + }, + { + "epoch": 0.53, + "learning_rate": 5.024675187018299e-06, + "loss": 0.9126, + "step": 2564 + }, + { + "epoch": 0.53, + "learning_rate": 5.023949420735163e-06, + "loss": 1.0224, + "step": 2565 + }, + { + "epoch": 0.53, + "learning_rate": 5.023223436978806e-06, + "loss": 1.2689, + "step": 2566 + }, + { + "epoch": 0.53, + "learning_rate": 5.022497235827235e-06, + "loss": 0.9545, + "step": 2567 + }, + { + "epoch": 0.53, + "learning_rate": 5.021770817358481e-06, + "loss": 0.6864, + "step": 2568 + }, + { + "epoch": 0.53, + "learning_rate": 5.021044181650596e-06, + "loss": 0.89, + "step": 2569 + }, + { + "epoch": 0.53, + "learning_rate": 5.020317328781658e-06, + "loss": 1.082, + "step": 2570 + }, + { + "epoch": 0.53, + "learning_rate": 5.019590258829767e-06, + "loss": 0.8674, + "step": 2571 + }, + { + "epoch": 0.53, + "learning_rate": 5.018862971873049e-06, + "loss": 1.0246, + "step": 2572 + }, + { + "epoch": 0.54, + "learning_rate": 5.018135467989646e-06, + "loss": 1.028, + "step": 2573 + }, + { + "epoch": 0.54, + "learning_rate": 5.017407747257732e-06, + "loss": 1.0678, + "step": 2574 + }, + { + "epoch": 0.54, + "learning_rate": 5.0166798097555e-06, + "loss": 1.1637, + "step": 2575 + }, + { + "epoch": 0.54, + "learning_rate": 5.015951655561165e-06, + "loss": 1.0397, + "step": 2576 + }, + { + "epoch": 0.54, + "learning_rate": 5.015223284752969e-06, + "loss": 1.1779, + "step": 2577 + }, + { + "epoch": 0.54, + "learning_rate": 5.014494697409176e-06, + "loss": 1.186, + "step": 2578 + }, + { + "epoch": 0.54, + "learning_rate": 5.01376589360807e-06, + "loss": 1.1003, + "step": 2579 + }, + { + "epoch": 0.54, + "learning_rate": 5.013036873427963e-06, + "loss": 0.8704, + "step": 2580 + }, + { + "epoch": 0.54, + "learning_rate": 5.012307636947187e-06, + "loss": 1.0474, + "step": 2581 + }, + { + "epoch": 0.54, + "learning_rate": 5.0115781842441006e-06, + "loss": 1.297, + "step": 2582 + }, + { + "epoch": 0.54, + "learning_rate": 5.010848515397081e-06, + "loss": 0.8542, + "step": 2583 + }, + { + "epoch": 0.54, + "learning_rate": 5.010118630484531e-06, + "loss": 0.9307, + "step": 2584 + }, + { + "epoch": 0.54, + "learning_rate": 5.00938852958488e-06, + "loss": 0.8234, + "step": 2585 + }, + { + "epoch": 0.54, + "learning_rate": 5.008658212776572e-06, + "loss": 0.8584, + "step": 2586 + }, + { + "epoch": 0.54, + "learning_rate": 5.007927680138084e-06, + "loss": 0.857, + "step": 2587 + }, + { + "epoch": 0.54, + "learning_rate": 5.007196931747911e-06, + "loss": 0.8757, + "step": 2588 + }, + { + "epoch": 0.54, + "learning_rate": 5.006465967684569e-06, + "loss": 1.0301, + "step": 2589 + }, + { + "epoch": 0.54, + "learning_rate": 5.0057347880266025e-06, + "loss": 0.9718, + "step": 2590 + }, + { + "epoch": 0.54, + "learning_rate": 5.005003392852576e-06, + "loss": 0.7893, + "step": 2591 + }, + { + "epoch": 0.54, + "learning_rate": 5.004271782241078e-06, + "loss": 1.119, + "step": 2592 + }, + { + "epoch": 0.54, + "learning_rate": 5.00353995627072e-06, + "loss": 0.863, + "step": 2593 + }, + { + "epoch": 0.54, + "learning_rate": 5.002807915020137e-06, + "loss": 1.0219, + "step": 2594 + }, + { + "epoch": 0.54, + "learning_rate": 5.002075658567987e-06, + "loss": 0.9247, + "step": 2595 + }, + { + "epoch": 0.54, + "learning_rate": 5.00134318699295e-06, + "loss": 1.0103, + "step": 2596 + }, + { + "epoch": 0.54, + "learning_rate": 5.00061050037373e-06, + "loss": 0.8803, + "step": 2597 + }, + { + "epoch": 0.54, + "learning_rate": 4.999877598789055e-06, + "loss": 0.7494, + "step": 2598 + }, + { + "epoch": 0.54, + "learning_rate": 4.999144482317674e-06, + "loss": 0.9234, + "step": 2599 + }, + { + "epoch": 0.54, + "learning_rate": 4.998411151038362e-06, + "loss": 0.9062, + "step": 2600 + }, + { + "epoch": 0.54, + "learning_rate": 4.9976776050299125e-06, + "loss": 0.9348, + "step": 2601 + }, + { + "epoch": 0.54, + "learning_rate": 4.996943844371149e-06, + "loss": 0.9997, + "step": 2602 + }, + { + "epoch": 0.54, + "learning_rate": 4.996209869140911e-06, + "loss": 1.0604, + "step": 2603 + }, + { + "epoch": 0.54, + "learning_rate": 4.995475679418065e-06, + "loss": 1.0737, + "step": 2604 + }, + { + "epoch": 0.54, + "learning_rate": 4.994741275281499e-06, + "loss": 0.955, + "step": 2605 + }, + { + "epoch": 0.54, + "learning_rate": 4.994006656810127e-06, + "loss": 1.0295, + "step": 2606 + }, + { + "epoch": 0.54, + "learning_rate": 4.993271824082881e-06, + "loss": 1.1063, + "step": 2607 + }, + { + "epoch": 0.54, + "learning_rate": 4.99253677717872e-06, + "loss": 0.8617, + "step": 2608 + }, + { + "epoch": 0.54, + "learning_rate": 4.991801516176624e-06, + "loss": 1.202, + "step": 2609 + }, + { + "epoch": 0.54, + "learning_rate": 4.991066041155599e-06, + "loss": 0.971, + "step": 2610 + }, + { + "epoch": 0.54, + "learning_rate": 4.990330352194667e-06, + "loss": 1.0037, + "step": 2611 + }, + { + "epoch": 0.54, + "learning_rate": 4.989594449372883e-06, + "loss": 0.9203, + "step": 2612 + }, + { + "epoch": 0.54, + "learning_rate": 4.988858332769316e-06, + "loss": 0.9964, + "step": 2613 + }, + { + "epoch": 0.54, + "learning_rate": 4.988122002463064e-06, + "loss": 0.9321, + "step": 2614 + }, + { + "epoch": 0.54, + "learning_rate": 4.987385458533244e-06, + "loss": 0.9853, + "step": 2615 + }, + { + "epoch": 0.54, + "learning_rate": 4.986648701058999e-06, + "loss": 0.9984, + "step": 2616 + }, + { + "epoch": 0.54, + "learning_rate": 4.985911730119492e-06, + "loss": 0.9028, + "step": 2617 + }, + { + "epoch": 0.54, + "learning_rate": 4.985174545793912e-06, + "loss": 0.8199, + "step": 2618 + }, + { + "epoch": 0.54, + "learning_rate": 4.984437148161468e-06, + "loss": 0.9792, + "step": 2619 + }, + { + "epoch": 0.54, + "learning_rate": 4.983699537301394e-06, + "loss": 0.8038, + "step": 2620 + }, + { + "epoch": 0.55, + "learning_rate": 4.982961713292947e-06, + "loss": 1.0658, + "step": 2621 + }, + { + "epoch": 0.55, + "learning_rate": 4.982223676215406e-06, + "loss": 1.0377, + "step": 2622 + }, + { + "epoch": 0.55, + "learning_rate": 4.9814854261480715e-06, + "loss": 0.8457, + "step": 2623 + }, + { + "epoch": 0.55, + "learning_rate": 4.9807469631702695e-06, + "loss": 1.1374, + "step": 2624 + }, + { + "epoch": 0.55, + "learning_rate": 4.980008287361347e-06, + "loss": 1.0478, + "step": 2625 + }, + { + "epoch": 0.55, + "learning_rate": 4.979269398800676e-06, + "loss": 1.1262, + "step": 2626 + }, + { + "epoch": 0.55, + "learning_rate": 4.978530297567649e-06, + "loss": 1.0051, + "step": 2627 + }, + { + "epoch": 0.55, + "learning_rate": 4.977790983741683e-06, + "loss": 0.9621, + "step": 2628 + }, + { + "epoch": 0.55, + "learning_rate": 4.9770514574022176e-06, + "loss": 0.933, + "step": 2629 + }, + { + "epoch": 0.55, + "learning_rate": 4.976311718628713e-06, + "loss": 0.8661, + "step": 2630 + }, + { + "epoch": 0.55, + "learning_rate": 4.975571767500656e-06, + "loss": 1.1698, + "step": 2631 + }, + { + "epoch": 0.55, + "learning_rate": 4.974831604097554e-06, + "loss": 1.035, + "step": 2632 + }, + { + "epoch": 0.55, + "learning_rate": 4.974091228498936e-06, + "loss": 0.9496, + "step": 2633 + }, + { + "epoch": 0.55, + "learning_rate": 4.973350640784357e-06, + "loss": 0.898, + "step": 2634 + }, + { + "epoch": 0.55, + "learning_rate": 4.972609841033393e-06, + "loss": 0.9609, + "step": 2635 + }, + { + "epoch": 0.55, + "learning_rate": 4.97186882932564e-06, + "loss": 1.0409, + "step": 2636 + }, + { + "epoch": 0.55, + "learning_rate": 4.971127605740723e-06, + "loss": 0.8878, + "step": 2637 + }, + { + "epoch": 0.55, + "learning_rate": 4.970386170358287e-06, + "loss": 0.9911, + "step": 2638 + }, + { + "epoch": 0.55, + "learning_rate": 4.9696445232579966e-06, + "loss": 0.8893, + "step": 2639 + }, + { + "epoch": 0.55, + "learning_rate": 4.968902664519542e-06, + "loss": 1.0484, + "step": 2640 + }, + { + "epoch": 0.55, + "learning_rate": 4.968160594222638e-06, + "loss": 0.9679, + "step": 2641 + }, + { + "epoch": 0.55, + "learning_rate": 4.967418312447018e-06, + "loss": 0.7957, + "step": 2642 + }, + { + "epoch": 0.55, + "learning_rate": 4.96667581927244e-06, + "loss": 0.8141, + "step": 2643 + }, + { + "epoch": 0.55, + "learning_rate": 4.9659331147786865e-06, + "loss": 1.026, + "step": 2644 + }, + { + "epoch": 0.55, + "learning_rate": 4.965190199045559e-06, + "loss": 0.8327, + "step": 2645 + }, + { + "epoch": 0.55, + "learning_rate": 4.964447072152887e-06, + "loss": 1.0568, + "step": 2646 + }, + { + "epoch": 0.55, + "learning_rate": 4.963703734180516e-06, + "loss": 0.8787, + "step": 2647 + }, + { + "epoch": 0.55, + "learning_rate": 4.962960185208318e-06, + "loss": 1.0242, + "step": 2648 + }, + { + "epoch": 0.55, + "learning_rate": 4.962216425316189e-06, + "loss": 1.0282, + "step": 2649 + }, + { + "epoch": 0.55, + "learning_rate": 4.961472454584045e-06, + "loss": 1.1661, + "step": 2650 + }, + { + "epoch": 0.55, + "learning_rate": 4.960728273091826e-06, + "loss": 0.8773, + "step": 2651 + }, + { + "epoch": 0.55, + "learning_rate": 4.959983880919494e-06, + "loss": 1.0649, + "step": 2652 + }, + { + "epoch": 0.55, + "learning_rate": 4.959239278147033e-06, + "loss": 0.9757, + "step": 2653 + }, + { + "epoch": 0.55, + "learning_rate": 4.958494464854452e-06, + "loss": 0.7974, + "step": 2654 + }, + { + "epoch": 0.55, + "learning_rate": 4.9577494411217805e-06, + "loss": 1.0374, + "step": 2655 + }, + { + "epoch": 0.55, + "learning_rate": 4.957004207029072e-06, + "loss": 0.9779, + "step": 2656 + }, + { + "epoch": 0.55, + "learning_rate": 4.9562587626564e-06, + "loss": 0.8656, + "step": 2657 + }, + { + "epoch": 0.55, + "learning_rate": 4.955513108083864e-06, + "loss": 0.9525, + "step": 2658 + }, + { + "epoch": 0.55, + "learning_rate": 4.9547672433915844e-06, + "loss": 1.0833, + "step": 2659 + }, + { + "epoch": 0.55, + "learning_rate": 4.954021168659703e-06, + "loss": 0.8195, + "step": 2660 + }, + { + "epoch": 0.55, + "learning_rate": 4.953274883968387e-06, + "loss": 1.0041, + "step": 2661 + }, + { + "epoch": 0.55, + "learning_rate": 4.952528389397824e-06, + "loss": 0.9358, + "step": 2662 + }, + { + "epoch": 0.55, + "learning_rate": 4.951781685028226e-06, + "loss": 1.1057, + "step": 2663 + }, + { + "epoch": 0.55, + "learning_rate": 4.951034770939823e-06, + "loss": 1.0556, + "step": 2664 + }, + { + "epoch": 0.55, + "learning_rate": 4.950287647212875e-06, + "loss": 0.9416, + "step": 2665 + }, + { + "epoch": 0.55, + "learning_rate": 4.9495403139276584e-06, + "loss": 1.0965, + "step": 2666 + }, + { + "epoch": 0.55, + "learning_rate": 4.948792771164473e-06, + "loss": 0.9436, + "step": 2667 + }, + { + "epoch": 0.55, + "learning_rate": 4.948045019003644e-06, + "loss": 0.841, + "step": 2668 + }, + { + "epoch": 0.56, + "learning_rate": 4.947297057525517e-06, + "loss": 0.7557, + "step": 2669 + }, + { + "epoch": 0.56, + "learning_rate": 4.946548886810459e-06, + "loss": 1.1596, + "step": 2670 + }, + { + "epoch": 0.56, + "learning_rate": 4.945800506938863e-06, + "loss": 1.1219, + "step": 2671 + }, + { + "epoch": 0.56, + "learning_rate": 4.9450519179911415e-06, + "loss": 0.8848, + "step": 2672 + }, + { + "epoch": 0.56, + "learning_rate": 4.94430312004773e-06, + "loss": 0.7982, + "step": 2673 + }, + { + "epoch": 0.56, + "learning_rate": 4.943554113189087e-06, + "loss": 0.8329, + "step": 2674 + }, + { + "epoch": 0.56, + "learning_rate": 4.942804897495693e-06, + "loss": 1.0377, + "step": 2675 + }, + { + "epoch": 0.56, + "learning_rate": 4.9420554730480516e-06, + "loss": 1.0421, + "step": 2676 + }, + { + "epoch": 0.56, + "learning_rate": 4.941305839926688e-06, + "loss": 0.8797, + "step": 2677 + }, + { + "epoch": 0.56, + "learning_rate": 4.940555998212151e-06, + "loss": 0.8476, + "step": 2678 + }, + { + "epoch": 0.56, + "learning_rate": 4.939805947985011e-06, + "loss": 0.861, + "step": 2679 + }, + { + "epoch": 0.56, + "learning_rate": 4.939055689325858e-06, + "loss": 0.8023, + "step": 2680 + }, + { + "epoch": 0.56, + "learning_rate": 4.938305222315312e-06, + "loss": 0.9713, + "step": 2681 + }, + { + "epoch": 0.56, + "learning_rate": 4.937554547034006e-06, + "loss": 0.8407, + "step": 2682 + }, + { + "epoch": 0.56, + "learning_rate": 4.936803663562604e-06, + "loss": 1.0309, + "step": 2683 + }, + { + "epoch": 0.56, + "learning_rate": 4.936052571981785e-06, + "loss": 0.9783, + "step": 2684 + }, + { + "epoch": 0.56, + "learning_rate": 4.935301272372256e-06, + "loss": 0.967, + "step": 2685 + }, + { + "epoch": 0.56, + "learning_rate": 4.934549764814744e-06, + "loss": 1.0677, + "step": 2686 + }, + { + "epoch": 0.56, + "learning_rate": 4.933798049389998e-06, + "loss": 0.9838, + "step": 2687 + }, + { + "epoch": 0.56, + "learning_rate": 4.933046126178788e-06, + "loss": 0.8914, + "step": 2688 + }, + { + "epoch": 0.56, + "learning_rate": 4.932293995261911e-06, + "loss": 0.8832, + "step": 2689 + }, + { + "epoch": 0.56, + "learning_rate": 4.931541656720181e-06, + "loss": 1.1334, + "step": 2690 + }, + { + "epoch": 0.56, + "learning_rate": 4.9307891106344395e-06, + "loss": 1.0899, + "step": 2691 + }, + { + "epoch": 0.56, + "learning_rate": 4.930036357085544e-06, + "loss": 1.0349, + "step": 2692 + }, + { + "epoch": 0.56, + "learning_rate": 4.929283396154381e-06, + "loss": 0.7373, + "step": 2693 + }, + { + "epoch": 0.56, + "learning_rate": 4.928530227921854e-06, + "loss": 0.9768, + "step": 2694 + }, + { + "epoch": 0.56, + "learning_rate": 4.927776852468892e-06, + "loss": 1.0718, + "step": 2695 + }, + { + "epoch": 0.56, + "learning_rate": 4.9270232698764445e-06, + "loss": 0.7544, + "step": 2696 + }, + { + "epoch": 0.56, + "learning_rate": 4.926269480225484e-06, + "loss": 1.0599, + "step": 2697 + }, + { + "epoch": 0.56, + "learning_rate": 4.925515483597007e-06, + "loss": 0.7094, + "step": 2698 + }, + { + "epoch": 0.56, + "learning_rate": 4.924761280072026e-06, + "loss": 0.9542, + "step": 2699 + }, + { + "epoch": 0.56, + "learning_rate": 4.924006869731584e-06, + "loss": 0.8975, + "step": 2700 + }, + { + "epoch": 0.56, + "learning_rate": 4.923252252656742e-06, + "loss": 1.0897, + "step": 2701 + }, + { + "epoch": 0.56, + "learning_rate": 4.922497428928581e-06, + "loss": 1.0104, + "step": 2702 + }, + { + "epoch": 0.56, + "learning_rate": 4.921742398628209e-06, + "loss": 0.9648, + "step": 2703 + }, + { + "epoch": 0.56, + "learning_rate": 4.9209871618367545e-06, + "loss": 0.9775, + "step": 2704 + }, + { + "epoch": 0.56, + "learning_rate": 4.920231718635365e-06, + "loss": 0.9733, + "step": 2705 + }, + { + "epoch": 0.56, + "learning_rate": 4.919476069105217e-06, + "loss": 0.8774, + "step": 2706 + }, + { + "epoch": 0.56, + "learning_rate": 4.9187202133275e-06, + "loss": 1.1875, + "step": 2707 + }, + { + "epoch": 0.56, + "learning_rate": 4.9179641513834334e-06, + "loss": 0.8037, + "step": 2708 + }, + { + "epoch": 0.56, + "learning_rate": 4.9172078833542555e-06, + "loss": 0.8601, + "step": 2709 + }, + { + "epoch": 0.56, + "learning_rate": 4.916451409321228e-06, + "loss": 0.9621, + "step": 2710 + }, + { + "epoch": 0.56, + "learning_rate": 4.915694729365633e-06, + "loss": 0.7833, + "step": 2711 + }, + { + "epoch": 0.56, + "learning_rate": 4.914937843568775e-06, + "loss": 0.7765, + "step": 2712 + }, + { + "epoch": 0.56, + "learning_rate": 4.914180752011983e-06, + "loss": 0.9161, + "step": 2713 + }, + { + "epoch": 0.56, + "learning_rate": 4.913423454776607e-06, + "loss": 0.8773, + "step": 2714 + }, + { + "epoch": 0.56, + "learning_rate": 4.912665951944016e-06, + "loss": 0.8775, + "step": 2715 + }, + { + "epoch": 0.56, + "learning_rate": 4.9119082435956055e-06, + "loss": 1.0859, + "step": 2716 + }, + { + "epoch": 0.57, + "learning_rate": 4.91115032981279e-06, + "loss": 1.0935, + "step": 2717 + }, + { + "epoch": 0.57, + "learning_rate": 4.910392210677009e-06, + "loss": 0.8797, + "step": 2718 + }, + { + "epoch": 0.57, + "learning_rate": 4.9096338862697206e-06, + "loss": 0.8954, + "step": 2719 + }, + { + "epoch": 0.57, + "learning_rate": 4.9088753566724075e-06, + "loss": 1.0249, + "step": 2720 + }, + { + "epoch": 0.57, + "learning_rate": 4.9081166219665736e-06, + "loss": 1.0245, + "step": 2721 + }, + { + "epoch": 0.57, + "learning_rate": 4.907357682233745e-06, + "loss": 0.8705, + "step": 2722 + }, + { + "epoch": 0.57, + "learning_rate": 4.906598537555469e-06, + "loss": 0.9589, + "step": 2723 + }, + { + "epoch": 0.57, + "learning_rate": 4.905839188013316e-06, + "loss": 0.9523, + "step": 2724 + }, + { + "epoch": 0.57, + "learning_rate": 4.905079633688879e-06, + "loss": 0.9988, + "step": 2725 + }, + { + "epoch": 0.57, + "learning_rate": 4.904319874663771e-06, + "loss": 0.7488, + "step": 2726 + }, + { + "epoch": 0.57, + "learning_rate": 4.903559911019628e-06, + "loss": 0.9469, + "step": 2727 + }, + { + "epoch": 0.57, + "learning_rate": 4.9027997428381095e-06, + "loss": 0.7759, + "step": 2728 + }, + { + "epoch": 0.57, + "learning_rate": 4.9020393702008935e-06, + "loss": 0.9342, + "step": 2729 + }, + { + "epoch": 0.57, + "learning_rate": 4.901278793189683e-06, + "loss": 1.0867, + "step": 2730 + }, + { + "epoch": 0.57, + "learning_rate": 4.900518011886202e-06, + "loss": 1.0903, + "step": 2731 + }, + { + "epoch": 0.57, + "learning_rate": 4.899757026372197e-06, + "loss": 0.8959, + "step": 2732 + }, + { + "epoch": 0.57, + "learning_rate": 4.898995836729434e-06, + "loss": 0.8601, + "step": 2733 + }, + { + "epoch": 0.57, + "learning_rate": 4.898234443039704e-06, + "loss": 1.0701, + "step": 2734 + }, + { + "epoch": 0.57, + "learning_rate": 4.897472845384819e-06, + "loss": 0.9708, + "step": 2735 + }, + { + "epoch": 0.57, + "learning_rate": 4.8967110438466115e-06, + "loss": 0.9711, + "step": 2736 + }, + { + "epoch": 0.57, + "learning_rate": 4.895949038506938e-06, + "loss": 0.9794, + "step": 2737 + }, + { + "epoch": 0.57, + "learning_rate": 4.895186829447676e-06, + "loss": 0.8646, + "step": 2738 + }, + { + "epoch": 0.57, + "learning_rate": 4.894424416750723e-06, + "loss": 0.9558, + "step": 2739 + }, + { + "epoch": 0.57, + "learning_rate": 4.8936618004980035e-06, + "loss": 0.9935, + "step": 2740 + }, + { + "epoch": 0.57, + "learning_rate": 4.892898980771458e-06, + "loss": 1.0338, + "step": 2741 + }, + { + "epoch": 0.57, + "learning_rate": 4.892135957653051e-06, + "loss": 1.0308, + "step": 2742 + }, + { + "epoch": 0.57, + "learning_rate": 4.891372731224772e-06, + "loss": 0.8039, + "step": 2743 + }, + { + "epoch": 0.57, + "learning_rate": 4.890609301568627e-06, + "loss": 0.9429, + "step": 2744 + }, + { + "epoch": 0.57, + "learning_rate": 4.889845668766648e-06, + "loss": 0.9663, + "step": 2745 + }, + { + "epoch": 0.57, + "learning_rate": 4.889081832900886e-06, + "loss": 0.9829, + "step": 2746 + }, + { + "epoch": 0.57, + "learning_rate": 4.888317794053417e-06, + "loss": 0.8141, + "step": 2747 + }, + { + "epoch": 0.57, + "learning_rate": 4.887553552306336e-06, + "loss": 0.7318, + "step": 2748 + }, + { + "epoch": 0.57, + "learning_rate": 4.8867891077417585e-06, + "loss": 0.7234, + "step": 2749 + }, + { + "epoch": 0.57, + "learning_rate": 4.886024460441827e-06, + "loss": 1.0528, + "step": 2750 + }, + { + "epoch": 0.57, + "learning_rate": 4.885259610488702e-06, + "loss": 0.8824, + "step": 2751 + }, + { + "epoch": 0.57, + "learning_rate": 4.8844945579645664e-06, + "loss": 0.9795, + "step": 2752 + }, + { + "epoch": 0.57, + "learning_rate": 4.883729302951625e-06, + "loss": 0.9262, + "step": 2753 + }, + { + "epoch": 0.57, + "learning_rate": 4.882963845532104e-06, + "loss": 0.9753, + "step": 2754 + }, + { + "epoch": 0.57, + "learning_rate": 4.882198185788253e-06, + "loss": 1.0247, + "step": 2755 + }, + { + "epoch": 0.57, + "learning_rate": 4.881432323802341e-06, + "loss": 0.8908, + "step": 2756 + }, + { + "epoch": 0.57, + "learning_rate": 4.88066625965666e-06, + "loss": 0.9432, + "step": 2757 + }, + { + "epoch": 0.57, + "learning_rate": 4.879899993433523e-06, + "loss": 0.9784, + "step": 2758 + }, + { + "epoch": 0.57, + "learning_rate": 4.879133525215266e-06, + "loss": 1.0825, + "step": 2759 + }, + { + "epoch": 0.57, + "learning_rate": 4.878366855084246e-06, + "loss": 0.7847, + "step": 2760 + }, + { + "epoch": 0.57, + "learning_rate": 4.877599983122841e-06, + "loss": 0.9108, + "step": 2761 + }, + { + "epoch": 0.57, + "learning_rate": 4.876832909413453e-06, + "loss": 0.9932, + "step": 2762 + }, + { + "epoch": 0.57, + "learning_rate": 4.876065634038502e-06, + "loss": 0.9881, + "step": 2763 + }, + { + "epoch": 0.57, + "learning_rate": 4.875298157080432e-06, + "loss": 0.8183, + "step": 2764 + }, + { + "epoch": 0.58, + "learning_rate": 4.87453047862171e-06, + "loss": 0.8591, + "step": 2765 + }, + { + "epoch": 0.58, + "learning_rate": 4.873762598744821e-06, + "loss": 0.8704, + "step": 2766 + }, + { + "epoch": 0.58, + "learning_rate": 4.872994517532276e-06, + "loss": 1.0732, + "step": 2767 + }, + { + "epoch": 0.58, + "learning_rate": 4.8722262350666015e-06, + "loss": 0.9458, + "step": 2768 + }, + { + "epoch": 0.58, + "learning_rate": 4.871457751430353e-06, + "loss": 1.1114, + "step": 2769 + }, + { + "epoch": 0.58, + "learning_rate": 4.8706890667061025e-06, + "loss": 0.971, + "step": 2770 + }, + { + "epoch": 0.58, + "learning_rate": 4.869920180976445e-06, + "loss": 1.1274, + "step": 2771 + }, + { + "epoch": 0.58, + "learning_rate": 4.8691510943239975e-06, + "loss": 0.7605, + "step": 2772 + }, + { + "epoch": 0.58, + "learning_rate": 4.8683818068313985e-06, + "loss": 0.9043, + "step": 2773 + }, + { + "epoch": 0.58, + "learning_rate": 4.8676123185813075e-06, + "loss": 1.077, + "step": 2774 + }, + { + "epoch": 0.58, + "learning_rate": 4.866842629656406e-06, + "loss": 1.096, + "step": 2775 + }, + { + "epoch": 0.58, + "learning_rate": 4.866072740139398e-06, + "loss": 1.0177, + "step": 2776 + }, + { + "epoch": 0.58, + "learning_rate": 4.865302650113006e-06, + "loss": 0.9357, + "step": 2777 + }, + { + "epoch": 0.58, + "learning_rate": 4.864532359659978e-06, + "loss": 0.903, + "step": 2778 + }, + { + "epoch": 0.58, + "learning_rate": 4.863761868863082e-06, + "loss": 0.8957, + "step": 2779 + }, + { + "epoch": 0.58, + "learning_rate": 4.862991177805104e-06, + "loss": 1.1514, + "step": 2780 + }, + { + "epoch": 0.58, + "learning_rate": 4.862220286568858e-06, + "loss": 0.8428, + "step": 2781 + }, + { + "epoch": 0.58, + "learning_rate": 4.8614491952371756e-06, + "loss": 1.1242, + "step": 2782 + }, + { + "epoch": 0.58, + "learning_rate": 4.86067790389291e-06, + "loss": 0.8054, + "step": 2783 + }, + { + "epoch": 0.58, + "learning_rate": 4.859906412618935e-06, + "loss": 1.2093, + "step": 2784 + }, + { + "epoch": 0.58, + "learning_rate": 4.85913472149815e-06, + "loss": 0.9945, + "step": 2785 + }, + { + "epoch": 0.58, + "learning_rate": 4.858362830613472e-06, + "loss": 0.7778, + "step": 2786 + }, + { + "epoch": 0.58, + "learning_rate": 4.857590740047839e-06, + "loss": 1.0078, + "step": 2787 + }, + { + "epoch": 0.58, + "learning_rate": 4.8568184498842144e-06, + "loss": 1.1242, + "step": 2788 + }, + { + "epoch": 0.58, + "learning_rate": 4.8560459602055804e-06, + "loss": 0.9869, + "step": 2789 + }, + { + "epoch": 0.58, + "learning_rate": 4.855273271094941e-06, + "loss": 0.9784, + "step": 2790 + }, + { + "epoch": 0.58, + "learning_rate": 4.8545003826353185e-06, + "loss": 1.0542, + "step": 2791 + }, + { + "epoch": 0.58, + "learning_rate": 4.853727294909763e-06, + "loss": 1.139, + "step": 2792 + }, + { + "epoch": 0.58, + "learning_rate": 4.852954008001343e-06, + "loss": 0.9511, + "step": 2793 + }, + { + "epoch": 0.58, + "learning_rate": 4.852180521993147e-06, + "loss": 0.6632, + "step": 2794 + }, + { + "epoch": 0.58, + "learning_rate": 4.851406836968286e-06, + "loss": 0.9383, + "step": 2795 + }, + { + "epoch": 0.58, + "learning_rate": 4.850632953009893e-06, + "loss": 0.9786, + "step": 2796 + }, + { + "epoch": 0.58, + "learning_rate": 4.8498588702011195e-06, + "loss": 1.1394, + "step": 2797 + }, + { + "epoch": 0.58, + "learning_rate": 4.849084588625144e-06, + "loss": 0.9034, + "step": 2798 + }, + { + "epoch": 0.58, + "learning_rate": 4.848310108365161e-06, + "loss": 1.0086, + "step": 2799 + }, + { + "epoch": 0.58, + "learning_rate": 4.847535429504387e-06, + "loss": 0.9096, + "step": 2800 + }, + { + "epoch": 0.58, + "learning_rate": 4.8467605521260656e-06, + "loss": 0.8642, + "step": 2801 + }, + { + "epoch": 0.58, + "learning_rate": 4.845985476313453e-06, + "loss": 1.0171, + "step": 2802 + }, + { + "epoch": 0.58, + "learning_rate": 4.8452102021498335e-06, + "loss": 0.9324, + "step": 2803 + }, + { + "epoch": 0.58, + "learning_rate": 4.844434729718509e-06, + "loss": 0.9764, + "step": 2804 + }, + { + "epoch": 0.58, + "learning_rate": 4.843659059102805e-06, + "loss": 0.9629, + "step": 2805 + }, + { + "epoch": 0.58, + "learning_rate": 4.842883190386066e-06, + "loss": 0.9794, + "step": 2806 + }, + { + "epoch": 0.58, + "learning_rate": 4.84210712365166e-06, + "loss": 1.0965, + "step": 2807 + }, + { + "epoch": 0.58, + "learning_rate": 4.841330858982975e-06, + "loss": 1.0717, + "step": 2808 + }, + { + "epoch": 0.58, + "learning_rate": 4.840554396463421e-06, + "loss": 1.1481, + "step": 2809 + }, + { + "epoch": 0.58, + "learning_rate": 4.839777736176427e-06, + "loss": 1.0776, + "step": 2810 + }, + { + "epoch": 0.58, + "learning_rate": 4.839000878205448e-06, + "loss": 0.7658, + "step": 2811 + }, + { + "epoch": 0.58, + "learning_rate": 4.838223822633955e-06, + "loss": 1.0859, + "step": 2812 + }, + { + "epoch": 0.59, + "learning_rate": 4.837446569545444e-06, + "loss": 0.9488, + "step": 2813 + }, + { + "epoch": 0.59, + "learning_rate": 4.836669119023431e-06, + "loss": 0.8919, + "step": 2814 + }, + { + "epoch": 0.59, + "learning_rate": 4.83589147115145e-06, + "loss": 0.9451, + "step": 2815 + }, + { + "epoch": 0.59, + "learning_rate": 4.835113626013063e-06, + "loss": 1.0162, + "step": 2816 + }, + { + "epoch": 0.59, + "learning_rate": 4.834335583691848e-06, + "loss": 1.0735, + "step": 2817 + }, + { + "epoch": 0.59, + "learning_rate": 4.833557344271405e-06, + "loss": 1.0893, + "step": 2818 + }, + { + "epoch": 0.59, + "learning_rate": 4.832778907835355e-06, + "loss": 0.8911, + "step": 2819 + }, + { + "epoch": 0.59, + "learning_rate": 4.832000274467344e-06, + "loss": 0.748, + "step": 2820 + }, + { + "epoch": 0.59, + "learning_rate": 4.831221444251033e-06, + "loss": 1.0105, + "step": 2821 + }, + { + "epoch": 0.59, + "learning_rate": 4.830442417270109e-06, + "loss": 1.0361, + "step": 2822 + }, + { + "epoch": 0.59, + "learning_rate": 4.829663193608278e-06, + "loss": 1.0661, + "step": 2823 + }, + { + "epoch": 0.59, + "learning_rate": 4.828883773349268e-06, + "loss": 1.1904, + "step": 2824 + }, + { + "epoch": 0.59, + "learning_rate": 4.828104156576826e-06, + "loss": 0.9388, + "step": 2825 + }, + { + "epoch": 0.59, + "learning_rate": 4.827324343374723e-06, + "loss": 1.235, + "step": 2826 + }, + { + "epoch": 0.59, + "learning_rate": 4.826544333826751e-06, + "loss": 0.8404, + "step": 2827 + }, + { + "epoch": 0.59, + "learning_rate": 4.825764128016719e-06, + "loss": 0.8038, + "step": 2828 + }, + { + "epoch": 0.59, + "learning_rate": 4.824983726028462e-06, + "loss": 1.1004, + "step": 2829 + }, + { + "epoch": 0.59, + "learning_rate": 4.824203127945834e-06, + "loss": 1.1069, + "step": 2830 + }, + { + "epoch": 0.59, + "learning_rate": 4.8234223338527105e-06, + "loss": 0.8658, + "step": 2831 + }, + { + "epoch": 0.59, + "learning_rate": 4.822641343832987e-06, + "loss": 0.7181, + "step": 2832 + }, + { + "epoch": 0.59, + "learning_rate": 4.821860157970581e-06, + "loss": 1.1232, + "step": 2833 + }, + { + "epoch": 0.59, + "learning_rate": 4.821078776349431e-06, + "loss": 0.7647, + "step": 2834 + }, + { + "epoch": 0.59, + "learning_rate": 4.820297199053497e-06, + "loss": 0.9346, + "step": 2835 + }, + { + "epoch": 0.59, + "learning_rate": 4.819515426166759e-06, + "loss": 0.8797, + "step": 2836 + }, + { + "epoch": 0.59, + "learning_rate": 4.818733457773217e-06, + "loss": 0.8636, + "step": 2837 + }, + { + "epoch": 0.59, + "learning_rate": 4.817951293956895e-06, + "loss": 1.0507, + "step": 2838 + }, + { + "epoch": 0.59, + "learning_rate": 4.817168934801838e-06, + "loss": 0.8214, + "step": 2839 + }, + { + "epoch": 0.59, + "learning_rate": 4.816386380392107e-06, + "loss": 0.7938, + "step": 2840 + }, + { + "epoch": 0.59, + "learning_rate": 4.8156036308117895e-06, + "loss": 0.7879, + "step": 2841 + }, + { + "epoch": 0.59, + "learning_rate": 4.814820686144992e-06, + "loss": 0.9677, + "step": 2842 + }, + { + "epoch": 0.59, + "learning_rate": 4.81403754647584e-06, + "loss": 0.8837, + "step": 2843 + }, + { + "epoch": 0.59, + "learning_rate": 4.8132542118884835e-06, + "loss": 0.7743, + "step": 2844 + }, + { + "epoch": 0.59, + "learning_rate": 4.8124706824670915e-06, + "loss": 1.0627, + "step": 2845 + }, + { + "epoch": 0.59, + "learning_rate": 4.8116869582958546e-06, + "loss": 0.8837, + "step": 2846 + }, + { + "epoch": 0.59, + "learning_rate": 4.810903039458982e-06, + "loss": 0.9299, + "step": 2847 + }, + { + "epoch": 0.59, + "learning_rate": 4.8101189260407086e-06, + "loss": 0.8153, + "step": 2848 + }, + { + "epoch": 0.59, + "learning_rate": 4.809334618125285e-06, + "loss": 0.7995, + "step": 2849 + }, + { + "epoch": 0.59, + "learning_rate": 4.808550115796986e-06, + "loss": 1.1485, + "step": 2850 + }, + { + "epoch": 0.59, + "learning_rate": 4.807765419140105e-06, + "loss": 0.869, + "step": 2851 + }, + { + "epoch": 0.59, + "learning_rate": 4.80698052823896e-06, + "loss": 0.8123, + "step": 2852 + }, + { + "epoch": 0.59, + "learning_rate": 4.806195443177886e-06, + "loss": 0.9202, + "step": 2853 + }, + { + "epoch": 0.59, + "learning_rate": 4.80541016404124e-06, + "loss": 1.0557, + "step": 2854 + }, + { + "epoch": 0.59, + "learning_rate": 4.804624690913401e-06, + "loss": 0.9507, + "step": 2855 + }, + { + "epoch": 0.59, + "learning_rate": 4.803839023878767e-06, + "loss": 1.1463, + "step": 2856 + }, + { + "epoch": 0.59, + "learning_rate": 4.80305316302176e-06, + "loss": 1.1706, + "step": 2857 + }, + { + "epoch": 0.59, + "learning_rate": 4.8022671084268185e-06, + "loss": 0.8909, + "step": 2858 + }, + { + "epoch": 0.59, + "learning_rate": 4.801480860178406e-06, + "loss": 0.9442, + "step": 2859 + }, + { + "epoch": 0.59, + "learning_rate": 4.800694418361002e-06, + "loss": 0.8815, + "step": 2860 + }, + { + "epoch": 0.6, + "learning_rate": 4.799907783059111e-06, + "loss": 0.9049, + "step": 2861 + }, + { + "epoch": 0.6, + "learning_rate": 4.799120954357258e-06, + "loss": 1.0631, + "step": 2862 + }, + { + "epoch": 0.6, + "learning_rate": 4.798333932339987e-06, + "loss": 0.8125, + "step": 2863 + }, + { + "epoch": 0.6, + "learning_rate": 4.797546717091863e-06, + "loss": 0.8176, + "step": 2864 + }, + { + "epoch": 0.6, + "learning_rate": 4.796759308697473e-06, + "loss": 0.9898, + "step": 2865 + }, + { + "epoch": 0.6, + "learning_rate": 4.795971707241423e-06, + "loss": 0.9235, + "step": 2866 + }, + { + "epoch": 0.6, + "learning_rate": 4.795183912808341e-06, + "loss": 1.1545, + "step": 2867 + }, + { + "epoch": 0.6, + "learning_rate": 4.794395925482876e-06, + "loss": 0.8292, + "step": 2868 + }, + { + "epoch": 0.6, + "learning_rate": 4.793607745349697e-06, + "loss": 1.105, + "step": 2869 + }, + { + "epoch": 0.6, + "learning_rate": 4.792819372493494e-06, + "loss": 0.9857, + "step": 2870 + }, + { + "epoch": 0.6, + "learning_rate": 4.7920308069989764e-06, + "loss": 1.1532, + "step": 2871 + }, + { + "epoch": 0.6, + "learning_rate": 4.7912420489508775e-06, + "loss": 1.0268, + "step": 2872 + }, + { + "epoch": 0.6, + "learning_rate": 4.790453098433947e-06, + "loss": 0.8412, + "step": 2873 + }, + { + "epoch": 0.6, + "learning_rate": 4.789663955532961e-06, + "loss": 0.9191, + "step": 2874 + }, + { + "epoch": 0.6, + "learning_rate": 4.788874620332708e-06, + "loss": 1.122, + "step": 2875 + }, + { + "epoch": 0.6, + "learning_rate": 4.788085092918007e-06, + "loss": 1.0056, + "step": 2876 + }, + { + "epoch": 0.6, + "learning_rate": 4.787295373373688e-06, + "loss": 1.0216, + "step": 2877 + }, + { + "epoch": 0.6, + "learning_rate": 4.786505461784611e-06, + "loss": 0.7896, + "step": 2878 + }, + { + "epoch": 0.6, + "learning_rate": 4.785715358235649e-06, + "loss": 0.9848, + "step": 2879 + }, + { + "epoch": 0.6, + "learning_rate": 4.784925062811698e-06, + "loss": 0.8343, + "step": 2880 + }, + { + "epoch": 0.6, + "learning_rate": 4.784134575597677e-06, + "loss": 1.0993, + "step": 2881 + }, + { + "epoch": 0.6, + "learning_rate": 4.783343896678524e-06, + "loss": 0.9762, + "step": 2882 + }, + { + "epoch": 0.6, + "learning_rate": 4.782553026139195e-06, + "loss": 0.7929, + "step": 2883 + }, + { + "epoch": 0.6, + "learning_rate": 4.781761964064672e-06, + "loss": 1.1848, + "step": 2884 + }, + { + "epoch": 0.6, + "learning_rate": 4.7809707105399525e-06, + "loss": 0.9826, + "step": 2885 + }, + { + "epoch": 0.6, + "learning_rate": 4.780179265650057e-06, + "loss": 1.0781, + "step": 2886 + }, + { + "epoch": 0.6, + "learning_rate": 4.7793876294800265e-06, + "loss": 1.1268, + "step": 2887 + }, + { + "epoch": 0.6, + "learning_rate": 4.778595802114923e-06, + "loss": 0.9998, + "step": 2888 + }, + { + "epoch": 0.6, + "learning_rate": 4.777803783639827e-06, + "loss": 0.92, + "step": 2889 + }, + { + "epoch": 0.6, + "learning_rate": 4.777011574139842e-06, + "loss": 1.0166, + "step": 2890 + }, + { + "epoch": 0.6, + "learning_rate": 4.776219173700088e-06, + "loss": 0.9029, + "step": 2891 + }, + { + "epoch": 0.6, + "learning_rate": 4.775426582405713e-06, + "loss": 0.999, + "step": 2892 + }, + { + "epoch": 0.6, + "learning_rate": 4.774633800341877e-06, + "loss": 1.1518, + "step": 2893 + }, + { + "epoch": 0.6, + "learning_rate": 4.773840827593765e-06, + "loss": 0.9275, + "step": 2894 + }, + { + "epoch": 0.6, + "learning_rate": 4.773047664246584e-06, + "loss": 0.9437, + "step": 2895 + }, + { + "epoch": 0.6, + "learning_rate": 4.772254310385557e-06, + "loss": 1.0086, + "step": 2896 + }, + { + "epoch": 0.6, + "learning_rate": 4.7714607660959326e-06, + "loss": 0.7944, + "step": 2897 + }, + { + "epoch": 0.6, + "learning_rate": 4.770667031462974e-06, + "loss": 0.9558, + "step": 2898 + }, + { + "epoch": 0.6, + "learning_rate": 4.7698731065719695e-06, + "loss": 1.0307, + "step": 2899 + }, + { + "epoch": 0.6, + "learning_rate": 4.769078991508226e-06, + "loss": 0.9667, + "step": 2900 + }, + { + "epoch": 0.6, + "learning_rate": 4.76828468635707e-06, + "loss": 0.9969, + "step": 2901 + }, + { + "epoch": 0.6, + "learning_rate": 4.767490191203853e-06, + "loss": 1.0142, + "step": 2902 + }, + { + "epoch": 0.6, + "learning_rate": 4.7666955061339405e-06, + "loss": 0.8911, + "step": 2903 + }, + { + "epoch": 0.6, + "learning_rate": 4.7659006312327204e-06, + "loss": 1.0494, + "step": 2904 + }, + { + "epoch": 0.6, + "learning_rate": 4.765105566585604e-06, + "loss": 1.0229, + "step": 2905 + }, + { + "epoch": 0.6, + "learning_rate": 4.76431031227802e-06, + "loss": 1.0967, + "step": 2906 + }, + { + "epoch": 0.6, + "learning_rate": 4.763514868395421e-06, + "loss": 0.8342, + "step": 2907 + }, + { + "epoch": 0.6, + "learning_rate": 4.762719235023274e-06, + "loss": 1.0053, + "step": 2908 + }, + { + "epoch": 0.61, + "learning_rate": 4.76192341224707e-06, + "loss": 0.8375, + "step": 2909 + }, + { + "epoch": 0.61, + "learning_rate": 4.761127400152321e-06, + "loss": 0.9901, + "step": 2910 + }, + { + "epoch": 0.61, + "learning_rate": 4.76033119882456e-06, + "loss": 0.9149, + "step": 2911 + }, + { + "epoch": 0.61, + "learning_rate": 4.759534808349337e-06, + "loss": 1.0135, + "step": 2912 + }, + { + "epoch": 0.61, + "learning_rate": 4.758738228812224e-06, + "loss": 0.8248, + "step": 2913 + }, + { + "epoch": 0.61, + "learning_rate": 4.7579414602988144e-06, + "loss": 0.9157, + "step": 2914 + }, + { + "epoch": 0.61, + "learning_rate": 4.75714450289472e-06, + "loss": 0.9965, + "step": 2915 + }, + { + "epoch": 0.61, + "learning_rate": 4.756347356685573e-06, + "loss": 0.8312, + "step": 2916 + }, + { + "epoch": 0.61, + "learning_rate": 4.7555500217570295e-06, + "loss": 0.9384, + "step": 2917 + }, + { + "epoch": 0.61, + "learning_rate": 4.75475249819476e-06, + "loss": 0.8476, + "step": 2918 + }, + { + "epoch": 0.61, + "learning_rate": 4.753954786084462e-06, + "loss": 1.0674, + "step": 2919 + }, + { + "epoch": 0.61, + "learning_rate": 4.753156885511845e-06, + "loss": 0.8742, + "step": 2920 + }, + { + "epoch": 0.61, + "learning_rate": 4.752358796562647e-06, + "loss": 0.8825, + "step": 2921 + }, + { + "epoch": 0.61, + "learning_rate": 4.751560519322621e-06, + "loss": 0.956, + "step": 2922 + }, + { + "epoch": 0.61, + "learning_rate": 4.750762053877543e-06, + "loss": 0.9224, + "step": 2923 + }, + { + "epoch": 0.61, + "learning_rate": 4.749963400313206e-06, + "loss": 0.9427, + "step": 2924 + }, + { + "epoch": 0.61, + "learning_rate": 4.749164558715427e-06, + "loss": 1.0677, + "step": 2925 + }, + { + "epoch": 0.61, + "learning_rate": 4.748365529170041e-06, + "loss": 0.8543, + "step": 2926 + }, + { + "epoch": 0.61, + "learning_rate": 4.7475663117629035e-06, + "loss": 0.8991, + "step": 2927 + }, + { + "epoch": 0.61, + "learning_rate": 4.746766906579891e-06, + "loss": 0.94, + "step": 2928 + }, + { + "epoch": 0.61, + "learning_rate": 4.745967313706898e-06, + "loss": 0.7597, + "step": 2929 + }, + { + "epoch": 0.61, + "learning_rate": 4.745167533229843e-06, + "loss": 0.8865, + "step": 2930 + }, + { + "epoch": 0.61, + "learning_rate": 4.74436756523466e-06, + "loss": 0.9761, + "step": 2931 + }, + { + "epoch": 0.61, + "learning_rate": 4.7435674098073076e-06, + "loss": 0.7734, + "step": 2932 + }, + { + "epoch": 0.61, + "learning_rate": 4.742767067033761e-06, + "loss": 0.9891, + "step": 2933 + }, + { + "epoch": 0.61, + "learning_rate": 4.7419665370000175e-06, + "loss": 0.8091, + "step": 2934 + }, + { + "epoch": 0.61, + "learning_rate": 4.741165819792094e-06, + "loss": 0.9532, + "step": 2935 + }, + { + "epoch": 0.61, + "learning_rate": 4.740364915496026e-06, + "loss": 1.1199, + "step": 2936 + }, + { + "epoch": 0.61, + "learning_rate": 4.739563824197874e-06, + "loss": 0.8885, + "step": 2937 + }, + { + "epoch": 0.61, + "learning_rate": 4.738762545983712e-06, + "loss": 0.7341, + "step": 2938 + }, + { + "epoch": 0.61, + "learning_rate": 4.7379610809396395e-06, + "loss": 0.7823, + "step": 2939 + }, + { + "epoch": 0.61, + "learning_rate": 4.737159429151772e-06, + "loss": 1.0084, + "step": 2940 + }, + { + "epoch": 0.61, + "learning_rate": 4.736357590706248e-06, + "loss": 0.9854, + "step": 2941 + }, + { + "epoch": 0.61, + "learning_rate": 4.7355555656892244e-06, + "loss": 0.846, + "step": 2942 + }, + { + "epoch": 0.61, + "learning_rate": 4.734753354186879e-06, + "loss": 1.0073, + "step": 2943 + }, + { + "epoch": 0.61, + "learning_rate": 4.733950956285411e-06, + "loss": 0.9657, + "step": 2944 + }, + { + "epoch": 0.61, + "learning_rate": 4.733148372071036e-06, + "loss": 0.8099, + "step": 2945 + }, + { + "epoch": 0.61, + "learning_rate": 4.732345601629991e-06, + "loss": 1.008, + "step": 2946 + }, + { + "epoch": 0.61, + "learning_rate": 4.731542645048536e-06, + "loss": 0.9804, + "step": 2947 + }, + { + "epoch": 0.61, + "learning_rate": 4.730739502412946e-06, + "loss": 0.8989, + "step": 2948 + }, + { + "epoch": 0.61, + "learning_rate": 4.7299361738095205e-06, + "loss": 1.0996, + "step": 2949 + }, + { + "epoch": 0.61, + "learning_rate": 4.729132659324577e-06, + "loss": 0.8881, + "step": 2950 + }, + { + "epoch": 0.61, + "learning_rate": 4.728328959044453e-06, + "loss": 1.1627, + "step": 2951 + }, + { + "epoch": 0.61, + "learning_rate": 4.727525073055505e-06, + "loss": 0.9498, + "step": 2952 + }, + { + "epoch": 0.61, + "learning_rate": 4.726721001444111e-06, + "loss": 0.7894, + "step": 2953 + }, + { + "epoch": 0.61, + "learning_rate": 4.725916744296669e-06, + "loss": 0.8205, + "step": 2954 + }, + { + "epoch": 0.61, + "learning_rate": 4.725112301699595e-06, + "loss": 0.905, + "step": 2955 + }, + { + "epoch": 0.61, + "learning_rate": 4.7243076737393285e-06, + "loss": 0.9272, + "step": 2956 + }, + { + "epoch": 0.62, + "learning_rate": 4.723502860502325e-06, + "loss": 1.0368, + "step": 2957 + }, + { + "epoch": 0.62, + "learning_rate": 4.722697862075062e-06, + "loss": 0.7315, + "step": 2958 + }, + { + "epoch": 0.62, + "learning_rate": 4.721892678544038e-06, + "loss": 0.8794, + "step": 2959 + }, + { + "epoch": 0.62, + "learning_rate": 4.721087309995766e-06, + "loss": 1.0733, + "step": 2960 + }, + { + "epoch": 0.62, + "learning_rate": 4.720281756516787e-06, + "loss": 0.9513, + "step": 2961 + }, + { + "epoch": 0.62, + "learning_rate": 4.719476018193657e-06, + "loss": 0.9955, + "step": 2962 + }, + { + "epoch": 0.62, + "learning_rate": 4.71867009511295e-06, + "loss": 0.8348, + "step": 2963 + }, + { + "epoch": 0.62, + "learning_rate": 4.717863987361265e-06, + "loss": 1.0419, + "step": 2964 + }, + { + "epoch": 0.62, + "learning_rate": 4.717057695025215e-06, + "loss": 0.9154, + "step": 2965 + }, + { + "epoch": 0.62, + "learning_rate": 4.71625121819144e-06, + "loss": 0.913, + "step": 2966 + }, + { + "epoch": 0.62, + "learning_rate": 4.715444556946594e-06, + "loss": 0.9571, + "step": 2967 + }, + { + "epoch": 0.62, + "learning_rate": 4.714637711377352e-06, + "loss": 1.1672, + "step": 2968 + }, + { + "epoch": 0.62, + "learning_rate": 4.713830681570411e-06, + "loss": 1.0206, + "step": 2969 + }, + { + "epoch": 0.62, + "learning_rate": 4.713023467612485e-06, + "loss": 0.8163, + "step": 2970 + }, + { + "epoch": 0.62, + "learning_rate": 4.71221606959031e-06, + "loss": 0.7672, + "step": 2971 + }, + { + "epoch": 0.62, + "learning_rate": 4.711408487590641e-06, + "loss": 0.9236, + "step": 2972 + }, + { + "epoch": 0.62, + "learning_rate": 4.710600721700251e-06, + "loss": 0.8967, + "step": 2973 + }, + { + "epoch": 0.62, + "learning_rate": 4.709792772005937e-06, + "loss": 1.0023, + "step": 2974 + }, + { + "epoch": 0.62, + "learning_rate": 4.708984638594509e-06, + "loss": 1.0547, + "step": 2975 + }, + { + "epoch": 0.62, + "learning_rate": 4.708176321552806e-06, + "loss": 0.9954, + "step": 2976 + }, + { + "epoch": 0.62, + "learning_rate": 4.707367820967678e-06, + "loss": 0.8708, + "step": 2977 + }, + { + "epoch": 0.62, + "learning_rate": 4.706559136926001e-06, + "loss": 0.8108, + "step": 2978 + }, + { + "epoch": 0.62, + "learning_rate": 4.705750269514665e-06, + "loss": 0.8816, + "step": 2979 + }, + { + "epoch": 0.62, + "learning_rate": 4.704941218820585e-06, + "loss": 0.8904, + "step": 2980 + }, + { + "epoch": 0.62, + "learning_rate": 4.704131984930691e-06, + "loss": 1.1285, + "step": 2981 + }, + { + "epoch": 0.62, + "learning_rate": 4.703322567931939e-06, + "loss": 0.8329, + "step": 2982 + }, + { + "epoch": 0.62, + "learning_rate": 4.702512967911298e-06, + "loss": 1.0239, + "step": 2983 + }, + { + "epoch": 0.62, + "learning_rate": 4.7017031849557606e-06, + "loss": 0.927, + "step": 2984 + }, + { + "epoch": 0.62, + "learning_rate": 4.700893219152336e-06, + "loss": 1.2445, + "step": 2985 + }, + { + "epoch": 0.62, + "learning_rate": 4.700083070588057e-06, + "loss": 1.2013, + "step": 2986 + }, + { + "epoch": 0.62, + "learning_rate": 4.699272739349973e-06, + "loss": 0.9241, + "step": 2987 + }, + { + "epoch": 0.62, + "learning_rate": 4.698462225525156e-06, + "loss": 1.0359, + "step": 2988 + }, + { + "epoch": 0.62, + "learning_rate": 4.697651529200692e-06, + "loss": 0.9663, + "step": 2989 + }, + { + "epoch": 0.62, + "learning_rate": 4.696840650463693e-06, + "loss": 0.9415, + "step": 2990 + }, + { + "epoch": 0.62, + "learning_rate": 4.696029589401288e-06, + "loss": 0.9096, + "step": 2991 + }, + { + "epoch": 0.62, + "learning_rate": 4.695218346100625e-06, + "loss": 1.1242, + "step": 2992 + }, + { + "epoch": 0.62, + "learning_rate": 4.69440692064887e-06, + "loss": 0.8452, + "step": 2993 + }, + { + "epoch": 0.62, + "learning_rate": 4.693595313133213e-06, + "loss": 0.95, + "step": 2994 + }, + { + "epoch": 0.62, + "learning_rate": 4.692783523640861e-06, + "loss": 1.119, + "step": 2995 + }, + { + "epoch": 0.62, + "learning_rate": 4.69197155225904e-06, + "loss": 0.8879, + "step": 2996 + }, + { + "epoch": 0.62, + "learning_rate": 4.691159399074995e-06, + "loss": 1.0193, + "step": 2997 + }, + { + "epoch": 0.62, + "learning_rate": 4.690347064175995e-06, + "loss": 0.9862, + "step": 2998 + }, + { + "epoch": 0.62, + "learning_rate": 4.6895345476493226e-06, + "loss": 0.9465, + "step": 2999 + }, + { + "epoch": 0.62, + "learning_rate": 4.688721849582285e-06, + "loss": 0.8747, + "step": 3000 + }, + { + "epoch": 0.62, + "eval_loss": NaN, + "eval_runtime": 15.0279, + "eval_samples_per_second": 352.344, + "eval_steps_per_second": 44.051, + "step": 3000 + }, + { + "epoch": 0.62, + "learning_rate": 4.687908970062204e-06, + "loss": 0.9518, + "step": 3001 + }, + { + "epoch": 0.62, + "learning_rate": 4.687095909176424e-06, + "loss": 0.9898, + "step": 3002 + }, + { + "epoch": 0.62, + "learning_rate": 4.68628266701231e-06, + "loss": 0.8518, + "step": 3003 + }, + { + "epoch": 0.62, + "learning_rate": 4.685469243657244e-06, + "loss": 0.7503, + "step": 3004 + }, + { + "epoch": 0.62, + "learning_rate": 4.684655639198627e-06, + "loss": 0.7467, + "step": 3005 + }, + { + "epoch": 0.63, + "learning_rate": 4.683841853723883e-06, + "loss": 0.8044, + "step": 3006 + }, + { + "epoch": 0.63, + "learning_rate": 4.68302788732045e-06, + "loss": 0.8377, + "step": 3007 + }, + { + "epoch": 0.63, + "learning_rate": 4.6822137400757935e-06, + "loss": 0.8644, + "step": 3008 + }, + { + "epoch": 0.63, + "learning_rate": 4.681399412077388e-06, + "loss": 0.8762, + "step": 3009 + }, + { + "epoch": 0.63, + "learning_rate": 4.680584903412738e-06, + "loss": 1.2594, + "step": 3010 + }, + { + "epoch": 0.63, + "learning_rate": 4.679770214169358e-06, + "loss": 0.986, + "step": 3011 + }, + { + "epoch": 0.63, + "learning_rate": 4.67895534443479e-06, + "loss": 0.9514, + "step": 3012 + }, + { + "epoch": 0.63, + "learning_rate": 4.678140294296588e-06, + "loss": 0.9434, + "step": 3013 + }, + { + "epoch": 0.63, + "learning_rate": 4.677325063842332e-06, + "loss": 0.9443, + "step": 3014 + }, + { + "epoch": 0.63, + "learning_rate": 4.6765096531596185e-06, + "loss": 0.968, + "step": 3015 + }, + { + "epoch": 0.63, + "learning_rate": 4.675694062336062e-06, + "loss": 1.0099, + "step": 3016 + }, + { + "epoch": 0.63, + "learning_rate": 4.674878291459297e-06, + "loss": 1.0249, + "step": 3017 + }, + { + "epoch": 0.63, + "learning_rate": 4.6740623406169805e-06, + "loss": 0.8514, + "step": 3018 + }, + { + "epoch": 0.63, + "learning_rate": 4.673246209896784e-06, + "loss": 1.0592, + "step": 3019 + }, + { + "epoch": 0.63, + "learning_rate": 4.6724298993864025e-06, + "loss": 0.9291, + "step": 3020 + }, + { + "epoch": 0.63, + "learning_rate": 4.671613409173547e-06, + "loss": 0.8889, + "step": 3021 + }, + { + "epoch": 0.63, + "learning_rate": 4.670796739345951e-06, + "loss": 0.9185, + "step": 3022 + }, + { + "epoch": 0.63, + "learning_rate": 4.669979889991363e-06, + "loss": 0.8975, + "step": 3023 + }, + { + "epoch": 0.63, + "learning_rate": 4.669162861197556e-06, + "loss": 0.9568, + "step": 3024 + }, + { + "epoch": 0.63, + "learning_rate": 4.668345653052318e-06, + "loss": 0.937, + "step": 3025 + }, + { + "epoch": 0.63, + "learning_rate": 4.667528265643459e-06, + "loss": 0.8358, + "step": 3026 + }, + { + "epoch": 0.63, + "learning_rate": 4.666710699058807e-06, + "loss": 0.9835, + "step": 3027 + }, + { + "epoch": 0.63, + "learning_rate": 4.665892953386209e-06, + "loss": 0.9471, + "step": 3028 + }, + { + "epoch": 0.63, + "learning_rate": 4.665075028713532e-06, + "loss": 1.1176, + "step": 3029 + }, + { + "epoch": 0.63, + "learning_rate": 4.6642569251286625e-06, + "loss": 0.9811, + "step": 3030 + }, + { + "epoch": 0.63, + "learning_rate": 4.663438642719505e-06, + "loss": 1.0107, + "step": 3031 + }, + { + "epoch": 0.63, + "learning_rate": 4.662620181573984e-06, + "loss": 0.9262, + "step": 3032 + }, + { + "epoch": 0.63, + "learning_rate": 4.661801541780043e-06, + "loss": 1.0861, + "step": 3033 + }, + { + "epoch": 0.63, + "learning_rate": 4.660982723425644e-06, + "loss": 0.8239, + "step": 3034 + }, + { + "epoch": 0.63, + "learning_rate": 4.660163726598772e-06, + "loss": 1.2042, + "step": 3035 + }, + { + "epoch": 0.63, + "learning_rate": 4.659344551387424e-06, + "loss": 1.0402, + "step": 3036 + }, + { + "epoch": 0.63, + "learning_rate": 4.6585251978796244e-06, + "loss": 1.019, + "step": 3037 + }, + { + "epoch": 0.63, + "learning_rate": 4.657705666163408e-06, + "loss": 1.0939, + "step": 3038 + }, + { + "epoch": 0.63, + "learning_rate": 4.656885956326839e-06, + "loss": 0.9064, + "step": 3039 + }, + { + "epoch": 0.63, + "learning_rate": 4.65606606845799e-06, + "loss": 0.9934, + "step": 3040 + }, + { + "epoch": 0.63, + "learning_rate": 4.655246002644962e-06, + "loss": 1.0363, + "step": 3041 + }, + { + "epoch": 0.63, + "learning_rate": 4.654425758975868e-06, + "loss": 0.9093, + "step": 3042 + }, + { + "epoch": 0.63, + "learning_rate": 4.653605337538843e-06, + "loss": 1.0595, + "step": 3043 + }, + { + "epoch": 0.63, + "learning_rate": 4.652784738422044e-06, + "loss": 0.9243, + "step": 3044 + }, + { + "epoch": 0.63, + "learning_rate": 4.651963961713643e-06, + "loss": 1.0185, + "step": 3045 + }, + { + "epoch": 0.63, + "learning_rate": 4.651143007501832e-06, + "loss": 0.8653, + "step": 3046 + }, + { + "epoch": 0.63, + "learning_rate": 4.650321875874822e-06, + "loss": 0.958, + "step": 3047 + }, + { + "epoch": 0.63, + "learning_rate": 4.649500566920845e-06, + "loss": 0.8905, + "step": 3048 + }, + { + "epoch": 0.63, + "learning_rate": 4.64867908072815e-06, + "loss": 0.9626, + "step": 3049 + }, + { + "epoch": 0.63, + "learning_rate": 4.647857417385004e-06, + "loss": 0.8212, + "step": 3050 + }, + { + "epoch": 0.63, + "learning_rate": 4.647035576979698e-06, + "loss": 0.8393, + "step": 3051 + }, + { + "epoch": 0.63, + "learning_rate": 4.646213559600535e-06, + "loss": 0.9107, + "step": 3052 + }, + { + "epoch": 0.63, + "learning_rate": 4.645391365335843e-06, + "loss": 0.8557, + "step": 3053 + }, + { + "epoch": 0.64, + "learning_rate": 4.644568994273967e-06, + "loss": 1.0617, + "step": 3054 + }, + { + "epoch": 0.64, + "learning_rate": 4.643746446503269e-06, + "loss": 0.9377, + "step": 3055 + }, + { + "epoch": 0.64, + "learning_rate": 4.642923722112133e-06, + "loss": 1.049, + "step": 3056 + }, + { + "epoch": 0.64, + "learning_rate": 4.642100821188961e-06, + "loss": 0.8781, + "step": 3057 + }, + { + "epoch": 0.64, + "learning_rate": 4.641277743822171e-06, + "loss": 0.8521, + "step": 3058 + }, + { + "epoch": 0.64, + "learning_rate": 4.640454490100206e-06, + "loss": 0.9214, + "step": 3059 + }, + { + "epoch": 0.64, + "learning_rate": 4.639631060111523e-06, + "loss": 0.9106, + "step": 3060 + }, + { + "epoch": 0.64, + "learning_rate": 4.6388074539445995e-06, + "loss": 1.089, + "step": 3061 + }, + { + "epoch": 0.64, + "learning_rate": 4.637983671687931e-06, + "loss": 0.891, + "step": 3062 + }, + { + "epoch": 0.64, + "learning_rate": 4.637159713430035e-06, + "loss": 0.915, + "step": 3063 + }, + { + "epoch": 0.64, + "learning_rate": 4.636335579259445e-06, + "loss": 1.1608, + "step": 3064 + }, + { + "epoch": 0.64, + "learning_rate": 4.635511269264714e-06, + "loss": 1.0218, + "step": 3065 + }, + { + "epoch": 0.64, + "learning_rate": 4.634686783534412e-06, + "loss": 1.1135, + "step": 3066 + }, + { + "epoch": 0.64, + "learning_rate": 4.633862122157133e-06, + "loss": 0.9779, + "step": 3067 + }, + { + "epoch": 0.64, + "learning_rate": 4.633037285221485e-06, + "loss": 0.9657, + "step": 3068 + }, + { + "epoch": 0.64, + "learning_rate": 4.632212272816098e-06, + "loss": 0.8419, + "step": 3069 + }, + { + "epoch": 0.64, + "learning_rate": 4.631387085029618e-06, + "loss": 0.9165, + "step": 3070 + }, + { + "epoch": 0.64, + "learning_rate": 4.630561721950713e-06, + "loss": 0.8944, + "step": 3071 + }, + { + "epoch": 0.64, + "learning_rate": 4.629736183668067e-06, + "loss": 0.9364, + "step": 3072 + }, + { + "epoch": 0.64, + "learning_rate": 4.628910470270384e-06, + "loss": 0.8593, + "step": 3073 + }, + { + "epoch": 0.64, + "learning_rate": 4.628084581846387e-06, + "loss": 0.9705, + "step": 3074 + }, + { + "epoch": 0.64, + "learning_rate": 4.627258518484819e-06, + "loss": 0.9321, + "step": 3075 + }, + { + "epoch": 0.64, + "learning_rate": 4.626432280274438e-06, + "loss": 0.7933, + "step": 3076 + }, + { + "epoch": 0.64, + "learning_rate": 4.625605867304026e-06, + "loss": 1.0441, + "step": 3077 + }, + { + "epoch": 0.64, + "learning_rate": 4.624779279662378e-06, + "loss": 1.0429, + "step": 3078 + }, + { + "epoch": 0.64, + "learning_rate": 4.623952517438313e-06, + "loss": 0.9669, + "step": 3079 + }, + { + "epoch": 0.64, + "learning_rate": 4.623125580720666e-06, + "loss": 0.9869, + "step": 3080 + }, + { + "epoch": 0.64, + "learning_rate": 4.622298469598292e-06, + "loss": 0.7866, + "step": 3081 + }, + { + "epoch": 0.64, + "learning_rate": 4.6214711841600615e-06, + "loss": 0.875, + "step": 3082 + }, + { + "epoch": 0.64, + "learning_rate": 4.620643724494869e-06, + "loss": 0.8939, + "step": 3083 + }, + { + "epoch": 0.64, + "learning_rate": 4.6198160906916235e-06, + "loss": 1.1856, + "step": 3084 + }, + { + "epoch": 0.64, + "learning_rate": 4.6189882828392544e-06, + "loss": 0.9544, + "step": 3085 + }, + { + "epoch": 0.64, + "learning_rate": 4.61816030102671e-06, + "loss": 0.7741, + "step": 3086 + }, + { + "epoch": 0.64, + "learning_rate": 4.6173321453429576e-06, + "loss": 0.9803, + "step": 3087 + }, + { + "epoch": 0.64, + "learning_rate": 4.61650381587698e-06, + "loss": 1.0927, + "step": 3088 + }, + { + "epoch": 0.64, + "learning_rate": 4.615675312717784e-06, + "loss": 0.7975, + "step": 3089 + }, + { + "epoch": 0.64, + "learning_rate": 4.614846635954391e-06, + "loss": 1.1274, + "step": 3090 + }, + { + "epoch": 0.64, + "learning_rate": 4.614017785675842e-06, + "loss": 0.8602, + "step": 3091 + }, + { + "epoch": 0.64, + "learning_rate": 4.6131887619711965e-06, + "loss": 0.9268, + "step": 3092 + }, + { + "epoch": 0.64, + "learning_rate": 4.612359564929534e-06, + "loss": 0.9117, + "step": 3093 + }, + { + "epoch": 0.64, + "learning_rate": 4.611530194639952e-06, + "loss": 0.8501, + "step": 3094 + }, + { + "epoch": 0.64, + "learning_rate": 4.610700651191564e-06, + "loss": 0.8825, + "step": 3095 + }, + { + "epoch": 0.64, + "learning_rate": 4.609870934673507e-06, + "loss": 0.9283, + "step": 3096 + }, + { + "epoch": 0.64, + "learning_rate": 4.609041045174933e-06, + "loss": 0.7831, + "step": 3097 + }, + { + "epoch": 0.64, + "learning_rate": 4.608210982785014e-06, + "loss": 1.0002, + "step": 3098 + }, + { + "epoch": 0.64, + "learning_rate": 4.607380747592938e-06, + "loss": 0.898, + "step": 3099 + }, + { + "epoch": 0.64, + "learning_rate": 4.606550339687917e-06, + "loss": 0.9183, + "step": 3100 + }, + { + "epoch": 0.64, + "learning_rate": 4.605719759159176e-06, + "loss": 0.8782, + "step": 3101 + }, + { + "epoch": 0.65, + "learning_rate": 4.604889006095962e-06, + "loss": 1.0765, + "step": 3102 + }, + { + "epoch": 0.65, + "learning_rate": 4.604058080587538e-06, + "loss": 0.7357, + "step": 3103 + }, + { + "epoch": 0.65, + "learning_rate": 4.603226982723188e-06, + "loss": 1.0128, + "step": 3104 + }, + { + "epoch": 0.65, + "learning_rate": 4.602395712592213e-06, + "loss": 0.9319, + "step": 3105 + }, + { + "epoch": 0.65, + "learning_rate": 4.601564270283933e-06, + "loss": 1.1112, + "step": 3106 + }, + { + "epoch": 0.65, + "learning_rate": 4.600732655887687e-06, + "loss": 0.8437, + "step": 3107 + }, + { + "epoch": 0.65, + "learning_rate": 4.599900869492831e-06, + "loss": 0.9436, + "step": 3108 + }, + { + "epoch": 0.65, + "learning_rate": 4.59906891118874e-06, + "loss": 0.7562, + "step": 3109 + }, + { + "epoch": 0.65, + "learning_rate": 4.59823678106481e-06, + "loss": 0.9718, + "step": 3110 + }, + { + "epoch": 0.65, + "learning_rate": 4.597404479210451e-06, + "loss": 1.0037, + "step": 3111 + }, + { + "epoch": 0.65, + "learning_rate": 4.596572005715095e-06, + "loss": 0.8704, + "step": 3112 + }, + { + "epoch": 0.65, + "learning_rate": 4.595739360668191e-06, + "loss": 0.8347, + "step": 3113 + }, + { + "epoch": 0.65, + "learning_rate": 4.5949065441592065e-06, + "loss": 0.9201, + "step": 3114 + }, + { + "epoch": 0.65, + "learning_rate": 4.594073556277627e-06, + "loss": 0.7518, + "step": 3115 + }, + { + "epoch": 0.65, + "learning_rate": 4.59324039711296e-06, + "loss": 1.0214, + "step": 3116 + }, + { + "epoch": 0.65, + "learning_rate": 4.592407066754723e-06, + "loss": 0.898, + "step": 3117 + }, + { + "epoch": 0.65, + "learning_rate": 4.591573565292462e-06, + "loss": 0.8909, + "step": 3118 + }, + { + "epoch": 0.65, + "learning_rate": 4.590739892815734e-06, + "loss": 1.0544, + "step": 3119 + }, + { + "epoch": 0.65, + "learning_rate": 4.589906049414119e-06, + "loss": 0.9706, + "step": 3120 + }, + { + "epoch": 0.65, + "learning_rate": 4.589072035177211e-06, + "loss": 1.0674, + "step": 3121 + }, + { + "epoch": 0.65, + "learning_rate": 4.588237850194627e-06, + "loss": 0.9353, + "step": 3122 + }, + { + "epoch": 0.65, + "learning_rate": 4.5874034945560006e-06, + "loss": 0.9377, + "step": 3123 + }, + { + "epoch": 0.65, + "learning_rate": 4.58656896835098e-06, + "loss": 0.9675, + "step": 3124 + }, + { + "epoch": 0.65, + "learning_rate": 4.585734271669238e-06, + "loss": 1.1243, + "step": 3125 + }, + { + "epoch": 0.65, + "learning_rate": 4.5848994046004615e-06, + "loss": 1.1148, + "step": 3126 + }, + { + "epoch": 0.65, + "learning_rate": 4.584064367234358e-06, + "loss": 1.0847, + "step": 3127 + }, + { + "epoch": 0.65, + "learning_rate": 4.58322915966065e-06, + "loss": 0.8938, + "step": 3128 + }, + { + "epoch": 0.65, + "learning_rate": 4.5823937819690825e-06, + "loss": 0.8224, + "step": 3129 + }, + { + "epoch": 0.65, + "learning_rate": 4.581558234249416e-06, + "loss": 0.8603, + "step": 3130 + }, + { + "epoch": 0.65, + "learning_rate": 4.580722516591429e-06, + "loss": 1.0142, + "step": 3131 + }, + { + "epoch": 0.65, + "learning_rate": 4.579886629084921e-06, + "loss": 0.8956, + "step": 3132 + }, + { + "epoch": 0.65, + "learning_rate": 4.579050571819707e-06, + "loss": 1.2492, + "step": 3133 + }, + { + "epoch": 0.65, + "learning_rate": 4.578214344885623e-06, + "loss": 0.9579, + "step": 3134 + }, + { + "epoch": 0.65, + "learning_rate": 4.57737794837252e-06, + "loss": 1.1407, + "step": 3135 + }, + { + "epoch": 0.65, + "learning_rate": 4.576541382370268e-06, + "loss": 0.8865, + "step": 3136 + }, + { + "epoch": 0.65, + "learning_rate": 4.5757046469687586e-06, + "loss": 0.8387, + "step": 3137 + }, + { + "epoch": 0.65, + "learning_rate": 4.574867742257895e-06, + "loss": 0.9164, + "step": 3138 + }, + { + "epoch": 0.65, + "learning_rate": 4.5740306683276065e-06, + "loss": 0.9939, + "step": 3139 + }, + { + "epoch": 0.65, + "learning_rate": 4.573193425267835e-06, + "loss": 0.7047, + "step": 3140 + }, + { + "epoch": 0.65, + "learning_rate": 4.572356013168542e-06, + "loss": 0.9988, + "step": 3141 + }, + { + "epoch": 0.65, + "learning_rate": 4.571518432119707e-06, + "loss": 1.0334, + "step": 3142 + }, + { + "epoch": 0.65, + "learning_rate": 4.57068068221133e-06, + "loss": 1.0043, + "step": 3143 + }, + { + "epoch": 0.65, + "learning_rate": 4.569842763533424e-06, + "loss": 1.0353, + "step": 3144 + }, + { + "epoch": 0.65, + "learning_rate": 4.569004676176026e-06, + "loss": 0.8973, + "step": 3145 + }, + { + "epoch": 0.65, + "learning_rate": 4.568166420229188e-06, + "loss": 0.7469, + "step": 3146 + }, + { + "epoch": 0.65, + "learning_rate": 4.567327995782979e-06, + "loss": 0.8095, + "step": 3147 + }, + { + "epoch": 0.65, + "learning_rate": 4.56648940292749e-06, + "loss": 0.8342, + "step": 3148 + }, + { + "epoch": 0.65, + "learning_rate": 4.565650641752825e-06, + "loss": 1.0895, + "step": 3149 + }, + { + "epoch": 0.66, + "learning_rate": 4.5648117123491114e-06, + "loss": 0.8801, + "step": 3150 + }, + { + "epoch": 0.66, + "learning_rate": 4.56397261480649e-06, + "loss": 0.7843, + "step": 3151 + }, + { + "epoch": 0.66, + "learning_rate": 4.563133349215124e-06, + "loss": 0.8371, + "step": 3152 + }, + { + "epoch": 0.66, + "learning_rate": 4.56229391566519e-06, + "loss": 0.8594, + "step": 3153 + }, + { + "epoch": 0.66, + "learning_rate": 4.561454314246887e-06, + "loss": 0.952, + "step": 3154 + }, + { + "epoch": 0.66, + "learning_rate": 4.560614545050428e-06, + "loss": 0.926, + "step": 3155 + }, + { + "epoch": 0.66, + "learning_rate": 4.559774608166049e-06, + "loss": 1.0369, + "step": 3156 + }, + { + "epoch": 0.66, + "learning_rate": 4.558934503683999e-06, + "loss": 0.9788, + "step": 3157 + }, + { + "epoch": 0.66, + "learning_rate": 4.558094231694548e-06, + "loss": 0.8388, + "step": 3158 + }, + { + "epoch": 0.66, + "learning_rate": 4.557253792287983e-06, + "loss": 0.9064, + "step": 3159 + }, + { + "epoch": 0.66, + "learning_rate": 4.556413185554609e-06, + "loss": 0.938, + "step": 3160 + }, + { + "epoch": 0.66, + "learning_rate": 4.55557241158475e-06, + "loss": 0.8971, + "step": 3161 + }, + { + "epoch": 0.66, + "learning_rate": 4.554731470468746e-06, + "loss": 0.7372, + "step": 3162 + }, + { + "epoch": 0.66, + "learning_rate": 4.5538903622969555e-06, + "loss": 1.1798, + "step": 3163 + }, + { + "epoch": 0.66, + "learning_rate": 4.553049087159757e-06, + "loss": 0.7614, + "step": 3164 + }, + { + "epoch": 0.66, + "learning_rate": 4.552207645147545e-06, + "loss": 1.1323, + "step": 3165 + }, + { + "epoch": 0.66, + "learning_rate": 4.551366036350733e-06, + "loss": 0.8946, + "step": 3166 + }, + { + "epoch": 0.66, + "learning_rate": 4.55052426085975e-06, + "loss": 0.9426, + "step": 3167 + }, + { + "epoch": 0.66, + "learning_rate": 4.549682318765046e-06, + "loss": 1.2226, + "step": 3168 + }, + { + "epoch": 0.66, + "learning_rate": 4.548840210157088e-06, + "loss": 1.2983, + "step": 3169 + }, + { + "epoch": 0.66, + "learning_rate": 4.54799793512636e-06, + "loss": 1.0525, + "step": 3170 + }, + { + "epoch": 0.66, + "learning_rate": 4.5471554937633635e-06, + "loss": 0.9032, + "step": 3171 + }, + { + "epoch": 0.66, + "learning_rate": 4.54631288615862e-06, + "loss": 1.0312, + "step": 3172 + }, + { + "epoch": 0.66, + "learning_rate": 4.545470112402667e-06, + "loss": 0.8113, + "step": 3173 + }, + { + "epoch": 0.66, + "learning_rate": 4.544627172586062e-06, + "loss": 0.9054, + "step": 3174 + }, + { + "epoch": 0.66, + "learning_rate": 4.5437840667993755e-06, + "loss": 1.1193, + "step": 3175 + }, + { + "epoch": 0.66, + "learning_rate": 4.542940795133202e-06, + "loss": 0.7728, + "step": 3176 + }, + { + "epoch": 0.66, + "learning_rate": 4.54209735767815e-06, + "loss": 1.144, + "step": 3177 + }, + { + "epoch": 0.66, + "learning_rate": 4.5412537545248485e-06, + "loss": 1.0828, + "step": 3178 + }, + { + "epoch": 0.66, + "learning_rate": 4.54040998576394e-06, + "loss": 0.7095, + "step": 3179 + }, + { + "epoch": 0.66, + "learning_rate": 4.539566051486088e-06, + "loss": 0.9468, + "step": 3180 + }, + { + "epoch": 0.66, + "learning_rate": 4.538721951781974e-06, + "loss": 0.7854, + "step": 3181 + }, + { + "epoch": 0.66, + "learning_rate": 4.537877686742296e-06, + "loss": 0.8796, + "step": 3182 + }, + { + "epoch": 0.66, + "learning_rate": 4.53703325645777e-06, + "loss": 0.9828, + "step": 3183 + }, + { + "epoch": 0.66, + "learning_rate": 4.536188661019131e-06, + "loss": 0.8951, + "step": 3184 + }, + { + "epoch": 0.66, + "learning_rate": 4.535343900517129e-06, + "loss": 0.997, + "step": 3185 + }, + { + "epoch": 0.66, + "learning_rate": 4.534498975042535e-06, + "loss": 0.8277, + "step": 3186 + }, + { + "epoch": 0.66, + "learning_rate": 4.533653884686136e-06, + "loss": 0.9716, + "step": 3187 + }, + { + "epoch": 0.66, + "learning_rate": 4.5328086295387366e-06, + "loss": 0.924, + "step": 3188 + }, + { + "epoch": 0.66, + "learning_rate": 4.531963209691158e-06, + "loss": 0.9054, + "step": 3189 + }, + { + "epoch": 0.66, + "learning_rate": 4.531117625234244e-06, + "loss": 0.8828, + "step": 3190 + }, + { + "epoch": 0.66, + "learning_rate": 4.530271876258849e-06, + "loss": 0.8708, + "step": 3191 + }, + { + "epoch": 0.66, + "learning_rate": 4.52942596285585e-06, + "loss": 0.9937, + "step": 3192 + }, + { + "epoch": 0.66, + "learning_rate": 4.528579885116141e-06, + "loss": 1.0573, + "step": 3193 + }, + { + "epoch": 0.66, + "learning_rate": 4.527733643130633e-06, + "loss": 0.8786, + "step": 3194 + }, + { + "epoch": 0.66, + "learning_rate": 4.526887236990252e-06, + "loss": 0.9917, + "step": 3195 + }, + { + "epoch": 0.66, + "learning_rate": 4.526040666785948e-06, + "loss": 0.8868, + "step": 3196 + }, + { + "epoch": 0.66, + "learning_rate": 4.525193932608684e-06, + "loss": 0.8511, + "step": 3197 + }, + { + "epoch": 0.67, + "learning_rate": 4.524347034549438e-06, + "loss": 0.8072, + "step": 3198 + }, + { + "epoch": 0.67, + "learning_rate": 4.523499972699214e-06, + "loss": 0.9129, + "step": 3199 + }, + { + "epoch": 0.67, + "learning_rate": 4.522652747149026e-06, + "loss": 0.8953, + "step": 3200 + }, + { + "epoch": 0.67, + "learning_rate": 4.521805357989909e-06, + "loss": 1.1848, + "step": 3201 + }, + { + "epoch": 0.67, + "learning_rate": 4.520957805312916e-06, + "loss": 0.7492, + "step": 3202 + }, + { + "epoch": 0.67, + "learning_rate": 4.5201100892091144e-06, + "loss": 1.0002, + "step": 3203 + }, + { + "epoch": 0.67, + "learning_rate": 4.519262209769591e-06, + "loss": 0.7325, + "step": 3204 + }, + { + "epoch": 0.67, + "learning_rate": 4.518414167085454e-06, + "loss": 0.8325, + "step": 3205 + }, + { + "epoch": 0.67, + "learning_rate": 4.517565961247821e-06, + "loss": 0.9757, + "step": 3206 + }, + { + "epoch": 0.67, + "learning_rate": 4.516717592347835e-06, + "loss": 0.9462, + "step": 3207 + }, + { + "epoch": 0.67, + "learning_rate": 4.515869060476651e-06, + "loss": 1.0734, + "step": 3208 + }, + { + "epoch": 0.67, + "learning_rate": 4.515020365725446e-06, + "loss": 1.0478, + "step": 3209 + }, + { + "epoch": 0.67, + "learning_rate": 4.51417150818541e-06, + "loss": 1.0423, + "step": 3210 + }, + { + "epoch": 0.67, + "learning_rate": 4.513322487947752e-06, + "loss": 1.0398, + "step": 3211 + }, + { + "epoch": 0.67, + "learning_rate": 4.512473305103703e-06, + "loss": 0.9736, + "step": 3212 + }, + { + "epoch": 0.67, + "learning_rate": 4.511623959744504e-06, + "loss": 0.8756, + "step": 3213 + }, + { + "epoch": 0.67, + "learning_rate": 4.510774451961419e-06, + "loss": 0.7763, + "step": 3214 + }, + { + "epoch": 0.67, + "learning_rate": 4.5099247818457275e-06, + "loss": 0.9699, + "step": 3215 + }, + { + "epoch": 0.67, + "learning_rate": 4.509074949488724e-06, + "loss": 0.8681, + "step": 3216 + }, + { + "epoch": 0.67, + "learning_rate": 4.508224954981727e-06, + "loss": 1.0273, + "step": 3217 + }, + { + "epoch": 0.67, + "learning_rate": 4.507374798416065e-06, + "loss": 0.989, + "step": 3218 + }, + { + "epoch": 0.67, + "learning_rate": 4.506524479883089e-06, + "loss": 1.0806, + "step": 3219 + }, + { + "epoch": 0.67, + "learning_rate": 4.505673999474164e-06, + "loss": 0.835, + "step": 3220 + }, + { + "epoch": 0.67, + "learning_rate": 4.504823357280676e-06, + "loss": 0.966, + "step": 3221 + }, + { + "epoch": 0.67, + "learning_rate": 4.503972553394026e-06, + "loss": 0.9249, + "step": 3222 + }, + { + "epoch": 0.67, + "learning_rate": 4.503121587905632e-06, + "loss": 0.9214, + "step": 3223 + }, + { + "epoch": 0.67, + "learning_rate": 4.50227046090693e-06, + "loss": 0.8363, + "step": 3224 + }, + { + "epoch": 0.67, + "learning_rate": 4.501419172489376e-06, + "loss": 0.9762, + "step": 3225 + }, + { + "epoch": 0.67, + "learning_rate": 4.500567722744438e-06, + "loss": 0.9471, + "step": 3226 + }, + { + "epoch": 0.67, + "learning_rate": 4.499716111763604e-06, + "loss": 0.9023, + "step": 3227 + }, + { + "epoch": 0.67, + "learning_rate": 4.498864339638382e-06, + "loss": 0.7317, + "step": 3228 + }, + { + "epoch": 0.67, + "learning_rate": 4.498012406460294e-06, + "loss": 1.0489, + "step": 3229 + }, + { + "epoch": 0.67, + "learning_rate": 4.49716031232088e-06, + "loss": 0.9339, + "step": 3230 + }, + { + "epoch": 0.67, + "learning_rate": 4.496308057311697e-06, + "loss": 0.8915, + "step": 3231 + }, + { + "epoch": 0.67, + "learning_rate": 4.49545564152432e-06, + "loss": 0.8793, + "step": 3232 + }, + { + "epoch": 0.67, + "learning_rate": 4.4946030650503406e-06, + "loss": 0.9988, + "step": 3233 + }, + { + "epoch": 0.67, + "learning_rate": 4.493750327981369e-06, + "loss": 0.7372, + "step": 3234 + }, + { + "epoch": 0.67, + "learning_rate": 4.4928974304090305e-06, + "loss": 1.1039, + "step": 3235 + }, + { + "epoch": 0.67, + "learning_rate": 4.492044372424971e-06, + "loss": 1.1117, + "step": 3236 + }, + { + "epoch": 0.67, + "learning_rate": 4.491191154120849e-06, + "loss": 1.0306, + "step": 3237 + }, + { + "epoch": 0.67, + "learning_rate": 4.490337775588344e-06, + "loss": 0.7722, + "step": 3238 + }, + { + "epoch": 0.67, + "learning_rate": 4.489484236919152e-06, + "loss": 0.811, + "step": 3239 + }, + { + "epoch": 0.67, + "learning_rate": 4.488630538204985e-06, + "loss": 1.3126, + "step": 3240 + }, + { + "epoch": 0.67, + "learning_rate": 4.487776679537572e-06, + "loss": 0.8606, + "step": 3241 + }, + { + "epoch": 0.67, + "learning_rate": 4.486922661008662e-06, + "loss": 0.9031, + "step": 3242 + }, + { + "epoch": 0.67, + "learning_rate": 4.486068482710017e-06, + "loss": 0.9875, + "step": 3243 + }, + { + "epoch": 0.67, + "learning_rate": 4.485214144733421e-06, + "loss": 1.0799, + "step": 3244 + }, + { + "epoch": 0.67, + "learning_rate": 4.48435964717067e-06, + "loss": 0.9219, + "step": 3245 + }, + { + "epoch": 0.68, + "learning_rate": 4.483504990113581e-06, + "loss": 0.7849, + "step": 3246 + }, + { + "epoch": 0.68, + "learning_rate": 4.482650173653987e-06, + "loss": 0.9007, + "step": 3247 + }, + { + "epoch": 0.68, + "learning_rate": 4.481795197883738e-06, + "loss": 0.8961, + "step": 3248 + }, + { + "epoch": 0.68, + "learning_rate": 4.480940062894699e-06, + "loss": 1.0088, + "step": 3249 + }, + { + "epoch": 0.68, + "learning_rate": 4.480084768778758e-06, + "loss": 1.1692, + "step": 3250 + }, + { + "epoch": 0.68, + "learning_rate": 4.479229315627813e-06, + "loss": 1.1109, + "step": 3251 + }, + { + "epoch": 0.68, + "learning_rate": 4.478373703533784e-06, + "loss": 0.778, + "step": 3252 + }, + { + "epoch": 0.68, + "learning_rate": 4.477517932588606e-06, + "loss": 0.8897, + "step": 3253 + }, + { + "epoch": 0.68, + "learning_rate": 4.476662002884232e-06, + "loss": 1.0023, + "step": 3254 + }, + { + "epoch": 0.68, + "learning_rate": 4.475805914512631e-06, + "loss": 1.1579, + "step": 3255 + }, + { + "epoch": 0.68, + "learning_rate": 4.474949667565789e-06, + "loss": 0.966, + "step": 3256 + }, + { + "epoch": 0.68, + "learning_rate": 4.474093262135711e-06, + "loss": 0.8485, + "step": 3257 + }, + { + "epoch": 0.68, + "learning_rate": 4.4732366983144175e-06, + "loss": 1.0312, + "step": 3258 + }, + { + "epoch": 0.68, + "learning_rate": 4.472379976193945e-06, + "loss": 0.9535, + "step": 3259 + }, + { + "epoch": 0.68, + "learning_rate": 4.47152309586635e-06, + "loss": 1.0468, + "step": 3260 + }, + { + "epoch": 0.68, + "learning_rate": 4.470666057423703e-06, + "loss": 0.9334, + "step": 3261 + }, + { + "epoch": 0.68, + "learning_rate": 4.469808860958094e-06, + "loss": 0.9394, + "step": 3262 + }, + { + "epoch": 0.68, + "learning_rate": 4.468951506561626e-06, + "loss": 0.8189, + "step": 3263 + }, + { + "epoch": 0.68, + "learning_rate": 4.468093994326424e-06, + "loss": 1.0347, + "step": 3264 + }, + { + "epoch": 0.68, + "learning_rate": 4.467236324344628e-06, + "loss": 0.8667, + "step": 3265 + }, + { + "epoch": 0.68, + "learning_rate": 4.466378496708393e-06, + "loss": 1.1199, + "step": 3266 + }, + { + "epoch": 0.68, + "learning_rate": 4.465520511509893e-06, + "loss": 1.2093, + "step": 3267 + }, + { + "epoch": 0.68, + "learning_rate": 4.464662368841319e-06, + "loss": 0.8026, + "step": 3268 + }, + { + "epoch": 0.68, + "learning_rate": 4.463804068794878e-06, + "loss": 1.0439, + "step": 3269 + }, + { + "epoch": 0.68, + "learning_rate": 4.462945611462794e-06, + "loss": 0.8969, + "step": 3270 + }, + { + "epoch": 0.68, + "learning_rate": 4.462086996937309e-06, + "loss": 1.3904, + "step": 3271 + }, + { + "epoch": 0.68, + "learning_rate": 4.46122822531068e-06, + "loss": 1.0246, + "step": 3272 + }, + { + "epoch": 0.68, + "learning_rate": 4.460369296675182e-06, + "loss": 0.7448, + "step": 3273 + }, + { + "epoch": 0.68, + "learning_rate": 4.459510211123108e-06, + "loss": 0.9264, + "step": 3274 + }, + { + "epoch": 0.68, + "learning_rate": 4.458650968746766e-06, + "loss": 0.8438, + "step": 3275 + }, + { + "epoch": 0.68, + "learning_rate": 4.457791569638481e-06, + "loss": 0.9985, + "step": 3276 + }, + { + "epoch": 0.68, + "learning_rate": 4.456932013890596e-06, + "loss": 0.8425, + "step": 3277 + }, + { + "epoch": 0.68, + "learning_rate": 4.456072301595471e-06, + "loss": 0.9602, + "step": 3278 + }, + { + "epoch": 0.68, + "learning_rate": 4.45521243284548e-06, + "loss": 0.791, + "step": 3279 + }, + { + "epoch": 0.68, + "learning_rate": 4.454352407733017e-06, + "loss": 1.0895, + "step": 3280 + }, + { + "epoch": 0.68, + "learning_rate": 4.453492226350491e-06, + "loss": 0.7769, + "step": 3281 + }, + { + "epoch": 0.68, + "learning_rate": 4.452631888790331e-06, + "loss": 1.0126, + "step": 3282 + }, + { + "epoch": 0.68, + "learning_rate": 4.4517713951449764e-06, + "loss": 0.8093, + "step": 3283 + }, + { + "epoch": 0.68, + "learning_rate": 4.4509107455068894e-06, + "loss": 1.1267, + "step": 3284 + }, + { + "epoch": 0.68, + "learning_rate": 4.450049939968547e-06, + "loss": 0.8313, + "step": 3285 + }, + { + "epoch": 0.68, + "learning_rate": 4.4491889786224415e-06, + "loss": 0.9571, + "step": 3286 + }, + { + "epoch": 0.68, + "learning_rate": 4.4483278615610835e-06, + "loss": 0.9878, + "step": 3287 + }, + { + "epoch": 0.68, + "learning_rate": 4.447466588877001e-06, + "loss": 0.9375, + "step": 3288 + }, + { + "epoch": 0.68, + "learning_rate": 4.4466051606627355e-06, + "loss": 1.0182, + "step": 3289 + }, + { + "epoch": 0.68, + "learning_rate": 4.445743577010849e-06, + "loss": 0.749, + "step": 3290 + }, + { + "epoch": 0.68, + "learning_rate": 4.444881838013918e-06, + "loss": 1.0061, + "step": 3291 + }, + { + "epoch": 0.68, + "learning_rate": 4.444019943764538e-06, + "loss": 0.8873, + "step": 3292 + }, + { + "epoch": 0.68, + "learning_rate": 4.443157894355318e-06, + "loss": 0.8782, + "step": 3293 + }, + { + "epoch": 0.69, + "learning_rate": 4.442295689878884e-06, + "loss": 1.0894, + "step": 3294 + }, + { + "epoch": 0.69, + "learning_rate": 4.441433330427881e-06, + "loss": 0.83, + "step": 3295 + }, + { + "epoch": 0.69, + "learning_rate": 4.4405708160949705e-06, + "loss": 1.0744, + "step": 3296 + }, + { + "epoch": 0.69, + "learning_rate": 4.439708146972828e-06, + "loss": 1.0598, + "step": 3297 + }, + { + "epoch": 0.69, + "learning_rate": 4.438845323154149e-06, + "loss": 0.8161, + "step": 3298 + }, + { + "epoch": 0.69, + "learning_rate": 4.437982344731642e-06, + "loss": 0.9913, + "step": 3299 + }, + { + "epoch": 0.69, + "learning_rate": 4.4371192117980335e-06, + "loss": 1.111, + "step": 3300 + }, + { + "epoch": 0.69, + "learning_rate": 4.4362559244460696e-06, + "loss": 1.0002, + "step": 3301 + }, + { + "epoch": 0.69, + "learning_rate": 4.435392482768508e-06, + "loss": 0.8097, + "step": 3302 + }, + { + "epoch": 0.69, + "learning_rate": 4.434528886858128e-06, + "loss": 1.0539, + "step": 3303 + }, + { + "epoch": 0.69, + "learning_rate": 4.433665136807721e-06, + "loss": 0.9239, + "step": 3304 + }, + { + "epoch": 0.69, + "learning_rate": 4.432801232710097e-06, + "loss": 1.0484, + "step": 3305 + }, + { + "epoch": 0.69, + "learning_rate": 4.431937174658083e-06, + "loss": 0.8123, + "step": 3306 + }, + { + "epoch": 0.69, + "learning_rate": 4.431072962744521e-06, + "loss": 1.0213, + "step": 3307 + }, + { + "epoch": 0.69, + "learning_rate": 4.430208597062272e-06, + "loss": 0.8136, + "step": 3308 + }, + { + "epoch": 0.69, + "learning_rate": 4.429344077704212e-06, + "loss": 0.8728, + "step": 3309 + }, + { + "epoch": 0.69, + "learning_rate": 4.428479404763231e-06, + "loss": 1.0472, + "step": 3310 + }, + { + "epoch": 0.69, + "learning_rate": 4.427614578332242e-06, + "loss": 0.8552, + "step": 3311 + }, + { + "epoch": 0.69, + "learning_rate": 4.426749598504167e-06, + "loss": 0.8032, + "step": 3312 + }, + { + "epoch": 0.69, + "learning_rate": 4.42588446537195e-06, + "loss": 0.868, + "step": 3313 + }, + { + "epoch": 0.69, + "learning_rate": 4.425019179028549e-06, + "loss": 1.0481, + "step": 3314 + }, + { + "epoch": 0.69, + "learning_rate": 4.4241537395669386e-06, + "loss": 0.997, + "step": 3315 + }, + { + "epoch": 0.69, + "learning_rate": 4.423288147080111e-06, + "loss": 1.0476, + "step": 3316 + }, + { + "epoch": 0.69, + "learning_rate": 4.422422401661072e-06, + "loss": 0.7697, + "step": 3317 + }, + { + "epoch": 0.69, + "learning_rate": 4.4215565034028485e-06, + "loss": 0.8413, + "step": 3318 + }, + { + "epoch": 0.69, + "learning_rate": 4.420690452398481e-06, + "loss": 0.9296, + "step": 3319 + }, + { + "epoch": 0.69, + "learning_rate": 4.4198242487410245e-06, + "loss": 0.9671, + "step": 3320 + }, + { + "epoch": 0.69, + "learning_rate": 4.418957892523555e-06, + "loss": 1.1275, + "step": 3321 + }, + { + "epoch": 0.69, + "learning_rate": 4.41809138383916e-06, + "loss": 1.0889, + "step": 3322 + }, + { + "epoch": 0.69, + "learning_rate": 4.417224722780948e-06, + "loss": 0.86, + "step": 3323 + }, + { + "epoch": 0.69, + "learning_rate": 4.41635790944204e-06, + "loss": 0.9845, + "step": 3324 + }, + { + "epoch": 0.69, + "learning_rate": 4.415490943915576e-06, + "loss": 0.9264, + "step": 3325 + }, + { + "epoch": 0.69, + "learning_rate": 4.414623826294711e-06, + "loss": 1.1099, + "step": 3326 + }, + { + "epoch": 0.69, + "learning_rate": 4.4137565566726165e-06, + "loss": 1.023, + "step": 3327 + }, + { + "epoch": 0.69, + "learning_rate": 4.412889135142481e-06, + "loss": 1.0623, + "step": 3328 + }, + { + "epoch": 0.69, + "learning_rate": 4.4120215617975095e-06, + "loss": 0.9416, + "step": 3329 + }, + { + "epoch": 0.69, + "learning_rate": 4.411153836730921e-06, + "loss": 0.9628, + "step": 3330 + }, + { + "epoch": 0.69, + "learning_rate": 4.410285960035953e-06, + "loss": 0.9549, + "step": 3331 + }, + { + "epoch": 0.69, + "learning_rate": 4.40941793180586e-06, + "loss": 0.8794, + "step": 3332 + }, + { + "epoch": 0.69, + "learning_rate": 4.408549752133912e-06, + "loss": 0.8903, + "step": 3333 + }, + { + "epoch": 0.69, + "learning_rate": 4.407681421113391e-06, + "loss": 1.0391, + "step": 3334 + }, + { + "epoch": 0.69, + "learning_rate": 4.406812938837604e-06, + "loss": 0.8691, + "step": 3335 + }, + { + "epoch": 0.69, + "learning_rate": 4.405944305399866e-06, + "loss": 0.9104, + "step": 3336 + }, + { + "epoch": 0.69, + "learning_rate": 4.4050755208935136e-06, + "loss": 1.0622, + "step": 3337 + }, + { + "epoch": 0.69, + "learning_rate": 4.404206585411896e-06, + "loss": 0.8936, + "step": 3338 + }, + { + "epoch": 0.69, + "learning_rate": 4.403337499048381e-06, + "loss": 0.8666, + "step": 3339 + }, + { + "epoch": 0.69, + "learning_rate": 4.402468261896352e-06, + "loss": 0.9042, + "step": 3340 + }, + { + "epoch": 0.69, + "learning_rate": 4.401598874049209e-06, + "loss": 1.068, + "step": 3341 + }, + { + "epoch": 0.7, + "learning_rate": 4.400729335600366e-06, + "loss": 0.9277, + "step": 3342 + }, + { + "epoch": 0.7, + "learning_rate": 4.399859646643256e-06, + "loss": 0.8869, + "step": 3343 + }, + { + "epoch": 0.7, + "learning_rate": 4.398989807271327e-06, + "loss": 0.6673, + "step": 3344 + }, + { + "epoch": 0.7, + "learning_rate": 4.398119817578043e-06, + "loss": 0.7153, + "step": 3345 + }, + { + "epoch": 0.7, + "learning_rate": 4.397249677656884e-06, + "loss": 1.0675, + "step": 3346 + }, + { + "epoch": 0.7, + "learning_rate": 4.3963793876013475e-06, + "loss": 0.973, + "step": 3347 + }, + { + "epoch": 0.7, + "learning_rate": 4.395508947504945e-06, + "loss": 0.9984, + "step": 3348 + }, + { + "epoch": 0.7, + "learning_rate": 4.394638357461207e-06, + "loss": 1.1425, + "step": 3349 + }, + { + "epoch": 0.7, + "learning_rate": 4.393767617563676e-06, + "loss": 0.9081, + "step": 3350 + }, + { + "epoch": 0.7, + "learning_rate": 4.392896727905913e-06, + "loss": 1.1408, + "step": 3351 + }, + { + "epoch": 0.7, + "learning_rate": 4.392025688581497e-06, + "loss": 0.867, + "step": 3352 + }, + { + "epoch": 0.7, + "learning_rate": 4.39115449968402e-06, + "loss": 1.1025, + "step": 3353 + }, + { + "epoch": 0.7, + "learning_rate": 4.390283161307091e-06, + "loss": 0.9981, + "step": 3354 + }, + { + "epoch": 0.7, + "learning_rate": 4.3894116735443366e-06, + "loss": 1.0786, + "step": 3355 + }, + { + "epoch": 0.7, + "learning_rate": 4.388540036489396e-06, + "loss": 0.8672, + "step": 3356 + }, + { + "epoch": 0.7, + "learning_rate": 4.387668250235928e-06, + "loss": 1.0751, + "step": 3357 + }, + { + "epoch": 0.7, + "learning_rate": 4.386796314877605e-06, + "loss": 0.9135, + "step": 3358 + }, + { + "epoch": 0.7, + "learning_rate": 4.385924230508117e-06, + "loss": 0.8685, + "step": 3359 + }, + { + "epoch": 0.7, + "learning_rate": 4.38505199722117e-06, + "loss": 0.9796, + "step": 3360 + }, + { + "epoch": 0.7, + "learning_rate": 4.384179615110484e-06, + "loss": 0.9074, + "step": 3361 + }, + { + "epoch": 0.7, + "learning_rate": 4.383307084269797e-06, + "loss": 1.0492, + "step": 3362 + }, + { + "epoch": 0.7, + "learning_rate": 4.382434404792863e-06, + "loss": 0.9291, + "step": 3363 + }, + { + "epoch": 0.7, + "learning_rate": 4.381561576773451e-06, + "loss": 0.8773, + "step": 3364 + }, + { + "epoch": 0.7, + "learning_rate": 4.380688600305346e-06, + "loss": 0.9222, + "step": 3365 + }, + { + "epoch": 0.7, + "learning_rate": 4.379815475482349e-06, + "loss": 0.8438, + "step": 3366 + }, + { + "epoch": 0.7, + "learning_rate": 4.378942202398278e-06, + "loss": 0.902, + "step": 3367 + }, + { + "epoch": 0.7, + "learning_rate": 4.378068781146966e-06, + "loss": 0.9851, + "step": 3368 + }, + { + "epoch": 0.7, + "learning_rate": 4.377195211822261e-06, + "loss": 0.8909, + "step": 3369 + }, + { + "epoch": 0.7, + "learning_rate": 4.37632149451803e-06, + "loss": 0.8314, + "step": 3370 + }, + { + "epoch": 0.7, + "learning_rate": 4.375447629328151e-06, + "loss": 0.8936, + "step": 3371 + }, + { + "epoch": 0.7, + "learning_rate": 4.374573616346524e-06, + "loss": 1.0566, + "step": 3372 + }, + { + "epoch": 0.7, + "learning_rate": 4.373699455667058e-06, + "loss": 0.9836, + "step": 3373 + }, + { + "epoch": 0.7, + "learning_rate": 4.372825147383686e-06, + "loss": 0.9083, + "step": 3374 + }, + { + "epoch": 0.7, + "learning_rate": 4.371950691590348e-06, + "loss": 0.9043, + "step": 3375 + }, + { + "epoch": 0.7, + "learning_rate": 4.371076088381007e-06, + "loss": 0.8193, + "step": 3376 + }, + { + "epoch": 0.7, + "learning_rate": 4.370201337849638e-06, + "loss": 1.0721, + "step": 3377 + }, + { + "epoch": 0.7, + "learning_rate": 4.369326440090234e-06, + "loss": 1.0828, + "step": 3378 + }, + { + "epoch": 0.7, + "learning_rate": 4.368451395196799e-06, + "loss": 1.0586, + "step": 3379 + }, + { + "epoch": 0.7, + "learning_rate": 4.3675762032633625e-06, + "loss": 1.0943, + "step": 3380 + }, + { + "epoch": 0.7, + "learning_rate": 4.366700864383958e-06, + "loss": 0.918, + "step": 3381 + }, + { + "epoch": 0.7, + "learning_rate": 4.365825378652645e-06, + "loss": 0.85, + "step": 3382 + }, + { + "epoch": 0.7, + "learning_rate": 4.364949746163491e-06, + "loss": 0.9193, + "step": 3383 + }, + { + "epoch": 0.7, + "learning_rate": 4.364073967010585e-06, + "loss": 0.8768, + "step": 3384 + }, + { + "epoch": 0.7, + "learning_rate": 4.363198041288028e-06, + "loss": 0.8088, + "step": 3385 + }, + { + "epoch": 0.7, + "learning_rate": 4.36232196908994e-06, + "loss": 1.0142, + "step": 3386 + }, + { + "epoch": 0.7, + "learning_rate": 4.361445750510452e-06, + "loss": 1.0501, + "step": 3387 + }, + { + "epoch": 0.7, + "learning_rate": 4.360569385643716e-06, + "loss": 0.9089, + "step": 3388 + }, + { + "epoch": 0.7, + "learning_rate": 4.359692874583897e-06, + "loss": 1.133, + "step": 3389 + }, + { + "epoch": 0.71, + "learning_rate": 4.358816217425176e-06, + "loss": 0.8858, + "step": 3390 + }, + { + "epoch": 0.71, + "learning_rate": 4.357939414261748e-06, + "loss": 1.0208, + "step": 3391 + }, + { + "epoch": 0.71, + "learning_rate": 4.357062465187828e-06, + "loss": 0.9572, + "step": 3392 + }, + { + "epoch": 0.71, + "learning_rate": 4.3561853702976414e-06, + "loss": 0.8947, + "step": 3393 + }, + { + "epoch": 0.71, + "learning_rate": 4.355308129685435e-06, + "loss": 0.9601, + "step": 3394 + }, + { + "epoch": 0.71, + "learning_rate": 4.354430743445466e-06, + "loss": 0.7621, + "step": 3395 + }, + { + "epoch": 0.71, + "learning_rate": 4.353553211672011e-06, + "loss": 0.747, + "step": 3396 + }, + { + "epoch": 0.71, + "learning_rate": 4.35267553445936e-06, + "loss": 0.808, + "step": 3397 + }, + { + "epoch": 0.71, + "learning_rate": 4.3517977119018185e-06, + "loss": 0.8968, + "step": 3398 + }, + { + "epoch": 0.71, + "learning_rate": 4.3509197440937096e-06, + "loss": 0.8673, + "step": 3399 + }, + { + "epoch": 0.71, + "learning_rate": 4.350041631129371e-06, + "loss": 0.8697, + "step": 3400 + }, + { + "epoch": 0.71, + "learning_rate": 4.349163373103155e-06, + "loss": 1.1449, + "step": 3401 + }, + { + "epoch": 0.71, + "learning_rate": 4.348284970109432e-06, + "loss": 0.8641, + "step": 3402 + }, + { + "epoch": 0.71, + "learning_rate": 4.347406422242584e-06, + "loss": 0.8337, + "step": 3403 + }, + { + "epoch": 0.71, + "learning_rate": 4.3465277295970125e-06, + "loss": 0.8524, + "step": 3404 + }, + { + "epoch": 0.71, + "learning_rate": 4.3456488922671324e-06, + "loss": 1.139, + "step": 3405 + }, + { + "epoch": 0.71, + "learning_rate": 4.344769910347375e-06, + "loss": 0.9511, + "step": 3406 + }, + { + "epoch": 0.71, + "learning_rate": 4.343890783932186e-06, + "loss": 1.0521, + "step": 3407 + }, + { + "epoch": 0.71, + "learning_rate": 4.3430115131160294e-06, + "loss": 1.0845, + "step": 3408 + }, + { + "epoch": 0.71, + "learning_rate": 4.34213209799338e-06, + "loss": 1.194, + "step": 3409 + }, + { + "epoch": 0.71, + "learning_rate": 4.341252538658734e-06, + "loss": 1.0167, + "step": 3410 + }, + { + "epoch": 0.71, + "learning_rate": 4.340372835206598e-06, + "loss": 0.8838, + "step": 3411 + }, + { + "epoch": 0.71, + "learning_rate": 4.339492987731496e-06, + "loss": 0.7539, + "step": 3412 + }, + { + "epoch": 0.71, + "learning_rate": 4.338612996327967e-06, + "loss": 1.0874, + "step": 3413 + }, + { + "epoch": 0.71, + "learning_rate": 4.33773286109057e-06, + "loss": 0.8938, + "step": 3414 + }, + { + "epoch": 0.71, + "learning_rate": 4.33685258211387e-06, + "loss": 1.0346, + "step": 3415 + }, + { + "epoch": 0.71, + "learning_rate": 4.3359721594924565e-06, + "loss": 1.1296, + "step": 3416 + }, + { + "epoch": 0.71, + "learning_rate": 4.33509159332093e-06, + "loss": 1.029, + "step": 3417 + }, + { + "epoch": 0.71, + "learning_rate": 4.334210883693907e-06, + "loss": 0.9882, + "step": 3418 + }, + { + "epoch": 0.71, + "learning_rate": 4.333330030706019e-06, + "loss": 0.9229, + "step": 3419 + }, + { + "epoch": 0.71, + "learning_rate": 4.332449034451915e-06, + "loss": 1.1449, + "step": 3420 + }, + { + "epoch": 0.71, + "learning_rate": 4.331567895026257e-06, + "loss": 1.0202, + "step": 3421 + }, + { + "epoch": 0.71, + "learning_rate": 4.330686612523724e-06, + "loss": 1.0483, + "step": 3422 + }, + { + "epoch": 0.71, + "learning_rate": 4.329805187039009e-06, + "loss": 1.0702, + "step": 3423 + }, + { + "epoch": 0.71, + "learning_rate": 4.328923618666822e-06, + "loss": 0.891, + "step": 3424 + }, + { + "epoch": 0.71, + "learning_rate": 4.328041907501886e-06, + "loss": 0.8772, + "step": 3425 + }, + { + "epoch": 0.71, + "learning_rate": 4.327160053638944e-06, + "loss": 0.9633, + "step": 3426 + }, + { + "epoch": 0.71, + "learning_rate": 4.326278057172746e-06, + "loss": 1.0303, + "step": 3427 + }, + { + "epoch": 0.71, + "learning_rate": 4.3253959181980676e-06, + "loss": 0.9097, + "step": 3428 + }, + { + "epoch": 0.71, + "learning_rate": 4.324513636809691e-06, + "loss": 0.9014, + "step": 3429 + }, + { + "epoch": 0.71, + "learning_rate": 4.32363121310242e-06, + "loss": 0.992, + "step": 3430 + }, + { + "epoch": 0.71, + "learning_rate": 4.322748647171069e-06, + "loss": 1.0107, + "step": 3431 + }, + { + "epoch": 0.71, + "learning_rate": 4.32186593911047e-06, + "loss": 0.9996, + "step": 3432 + }, + { + "epoch": 0.71, + "learning_rate": 4.320983089015471e-06, + "loss": 0.9945, + "step": 3433 + }, + { + "epoch": 0.71, + "learning_rate": 4.320100096980932e-06, + "loss": 0.8982, + "step": 3434 + }, + { + "epoch": 0.71, + "learning_rate": 4.319216963101734e-06, + "loss": 0.9617, + "step": 3435 + }, + { + "epoch": 0.71, + "learning_rate": 4.318333687472765e-06, + "loss": 0.848, + "step": 3436 + }, + { + "epoch": 0.71, + "learning_rate": 4.317450270188937e-06, + "loss": 0.8862, + "step": 3437 + }, + { + "epoch": 0.72, + "learning_rate": 4.31656671134517e-06, + "loss": 0.8906, + "step": 3438 + }, + { + "epoch": 0.72, + "learning_rate": 4.315683011036405e-06, + "loss": 0.9274, + "step": 3439 + }, + { + "epoch": 0.72, + "learning_rate": 4.314799169357594e-06, + "loss": 0.8619, + "step": 3440 + }, + { + "epoch": 0.72, + "learning_rate": 4.313915186403706e-06, + "loss": 0.8059, + "step": 3441 + }, + { + "epoch": 0.72, + "learning_rate": 4.313031062269724e-06, + "loss": 0.9113, + "step": 3442 + }, + { + "epoch": 0.72, + "learning_rate": 4.31214679705065e-06, + "loss": 1.0043, + "step": 3443 + }, + { + "epoch": 0.72, + "learning_rate": 4.311262390841496e-06, + "loss": 0.9531, + "step": 3444 + }, + { + "epoch": 0.72, + "learning_rate": 4.31037784373729e-06, + "loss": 1.35, + "step": 3445 + }, + { + "epoch": 0.72, + "learning_rate": 4.30949315583308e-06, + "loss": 0.9556, + "step": 3446 + }, + { + "epoch": 0.72, + "learning_rate": 4.308608327223923e-06, + "loss": 1.0562, + "step": 3447 + }, + { + "epoch": 0.72, + "learning_rate": 4.307723358004895e-06, + "loss": 0.8761, + "step": 3448 + }, + { + "epoch": 0.72, + "learning_rate": 4.306838248271084e-06, + "loss": 0.7873, + "step": 3449 + }, + { + "epoch": 0.72, + "learning_rate": 4.305952998117598e-06, + "loss": 1.1943, + "step": 3450 + }, + { + "epoch": 0.72, + "learning_rate": 4.305067607639556e-06, + "loss": 0.9983, + "step": 3451 + }, + { + "epoch": 0.72, + "learning_rate": 4.304182076932091e-06, + "loss": 0.9429, + "step": 3452 + }, + { + "epoch": 0.72, + "learning_rate": 4.303296406090356e-06, + "loss": 0.9906, + "step": 3453 + }, + { + "epoch": 0.72, + "learning_rate": 4.302410595209515e-06, + "loss": 0.8486, + "step": 3454 + }, + { + "epoch": 0.72, + "learning_rate": 4.301524644384749e-06, + "loss": 0.8666, + "step": 3455 + }, + { + "epoch": 0.72, + "learning_rate": 4.300638553711251e-06, + "loss": 0.9506, + "step": 3456 + }, + { + "epoch": 0.72, + "learning_rate": 4.299752323284235e-06, + "loss": 1.065, + "step": 3457 + }, + { + "epoch": 0.72, + "learning_rate": 4.298865953198924e-06, + "loss": 0.8908, + "step": 3458 + }, + { + "epoch": 0.72, + "learning_rate": 4.29797944355056e-06, + "loss": 0.8107, + "step": 3459 + }, + { + "epoch": 0.72, + "learning_rate": 4.2970927944343964e-06, + "loss": 1.1452, + "step": 3460 + }, + { + "epoch": 0.72, + "learning_rate": 4.296206005945705e-06, + "loss": 1.0273, + "step": 3461 + }, + { + "epoch": 0.72, + "learning_rate": 4.295319078179771e-06, + "loss": 1.123, + "step": 3462 + }, + { + "epoch": 0.72, + "learning_rate": 4.294432011231893e-06, + "loss": 1.1995, + "step": 3463 + }, + { + "epoch": 0.72, + "learning_rate": 4.2935448051973894e-06, + "loss": 0.907, + "step": 3464 + }, + { + "epoch": 0.72, + "learning_rate": 4.2926574601715875e-06, + "loss": 1.0855, + "step": 3465 + }, + { + "epoch": 0.72, + "learning_rate": 4.291769976249835e-06, + "loss": 0.7324, + "step": 3466 + }, + { + "epoch": 0.72, + "learning_rate": 4.29088235352749e-06, + "loss": 0.7538, + "step": 3467 + }, + { + "epoch": 0.72, + "learning_rate": 4.289994592099928e-06, + "loss": 0.9504, + "step": 3468 + }, + { + "epoch": 0.72, + "learning_rate": 4.289106692062539e-06, + "loss": 1.101, + "step": 3469 + }, + { + "epoch": 0.72, + "learning_rate": 4.288218653510729e-06, + "loss": 0.8395, + "step": 3470 + }, + { + "epoch": 0.72, + "learning_rate": 4.287330476539916e-06, + "loss": 0.7937, + "step": 3471 + }, + { + "epoch": 0.72, + "learning_rate": 4.2864421612455355e-06, + "loss": 1.0348, + "step": 3472 + }, + { + "epoch": 0.72, + "learning_rate": 4.285553707723036e-06, + "loss": 0.8542, + "step": 3473 + }, + { + "epoch": 0.72, + "learning_rate": 4.284665116067883e-06, + "loss": 1.0183, + "step": 3474 + }, + { + "epoch": 0.72, + "learning_rate": 4.283776386375554e-06, + "loss": 0.7283, + "step": 3475 + }, + { + "epoch": 0.72, + "learning_rate": 4.282887518741544e-06, + "loss": 0.9496, + "step": 3476 + }, + { + "epoch": 0.72, + "learning_rate": 4.281998513261363e-06, + "loss": 0.9132, + "step": 3477 + }, + { + "epoch": 0.72, + "learning_rate": 4.2811093700305324e-06, + "loss": 0.7566, + "step": 3478 + }, + { + "epoch": 0.72, + "learning_rate": 4.280220089144591e-06, + "loss": 0.8266, + "step": 3479 + }, + { + "epoch": 0.72, + "learning_rate": 4.279330670699093e-06, + "loss": 0.8204, + "step": 3480 + }, + { + "epoch": 0.72, + "learning_rate": 4.278441114789604e-06, + "loss": 0.9413, + "step": 3481 + }, + { + "epoch": 0.72, + "learning_rate": 4.2775514215117114e-06, + "loss": 0.9314, + "step": 3482 + }, + { + "epoch": 0.72, + "learning_rate": 4.276661590961007e-06, + "loss": 0.9408, + "step": 3483 + }, + { + "epoch": 0.72, + "learning_rate": 4.2757716232331076e-06, + "loss": 0.8982, + "step": 3484 + }, + { + "epoch": 0.72, + "learning_rate": 4.274881518423637e-06, + "loss": 1.1474, + "step": 3485 + }, + { + "epoch": 0.73, + "learning_rate": 4.2739912766282394e-06, + "loss": 0.8295, + "step": 3486 + }, + { + "epoch": 0.73, + "learning_rate": 4.27310089794257e-06, + "loss": 0.8983, + "step": 3487 + }, + { + "epoch": 0.73, + "learning_rate": 4.272210382462299e-06, + "loss": 0.9493, + "step": 3488 + }, + { + "epoch": 0.73, + "learning_rate": 4.2713197302831134e-06, + "loss": 0.9503, + "step": 3489 + }, + { + "epoch": 0.73, + "learning_rate": 4.270428941500714e-06, + "loss": 0.7448, + "step": 3490 + }, + { + "epoch": 0.73, + "learning_rate": 4.269538016210815e-06, + "loss": 0.9328, + "step": 3491 + }, + { + "epoch": 0.73, + "learning_rate": 4.268646954509148e-06, + "loss": 0.8227, + "step": 3492 + }, + { + "epoch": 0.73, + "learning_rate": 4.2677557564914545e-06, + "loss": 1.1788, + "step": 3493 + }, + { + "epoch": 0.73, + "learning_rate": 4.266864422253496e-06, + "loss": 0.8258, + "step": 3494 + }, + { + "epoch": 0.73, + "learning_rate": 4.265972951891046e-06, + "loss": 1.0879, + "step": 3495 + }, + { + "epoch": 0.73, + "learning_rate": 4.265081345499893e-06, + "loss": 0.8698, + "step": 3496 + }, + { + "epoch": 0.73, + "learning_rate": 4.264189603175839e-06, + "loss": 0.9619, + "step": 3497 + }, + { + "epoch": 0.73, + "learning_rate": 4.263297725014702e-06, + "loss": 0.7746, + "step": 3498 + }, + { + "epoch": 0.73, + "learning_rate": 4.262405711112316e-06, + "loss": 1.0532, + "step": 3499 + }, + { + "epoch": 0.73, + "learning_rate": 4.261513561564526e-06, + "loss": 0.9368, + "step": 3500 + }, + { + "epoch": 0.73, + "learning_rate": 4.260621276467193e-06, + "loss": 0.7647, + "step": 3501 + }, + { + "epoch": 0.73, + "learning_rate": 4.259728855916194e-06, + "loss": 0.9242, + "step": 3502 + }, + { + "epoch": 0.73, + "learning_rate": 4.25883630000742e-06, + "loss": 0.9647, + "step": 3503 + }, + { + "epoch": 0.73, + "learning_rate": 4.2579436088367755e-06, + "loss": 0.8475, + "step": 3504 + }, + { + "epoch": 0.73, + "learning_rate": 4.257050782500179e-06, + "loss": 0.758, + "step": 3505 + }, + { + "epoch": 0.73, + "learning_rate": 4.256157821093566e-06, + "loss": 0.7758, + "step": 3506 + }, + { + "epoch": 0.73, + "learning_rate": 4.255264724712884e-06, + "loss": 0.7878, + "step": 3507 + }, + { + "epoch": 0.73, + "learning_rate": 4.254371493454099e-06, + "loss": 0.9981, + "step": 3508 + }, + { + "epoch": 0.73, + "learning_rate": 4.253478127413185e-06, + "loss": 0.9878, + "step": 3509 + }, + { + "epoch": 0.73, + "learning_rate": 4.252584626686136e-06, + "loss": 1.1134, + "step": 3510 + }, + { + "epoch": 0.73, + "learning_rate": 4.251690991368958e-06, + "loss": 0.8319, + "step": 3511 + }, + { + "epoch": 0.73, + "learning_rate": 4.250797221557673e-06, + "loss": 0.8534, + "step": 3512 + }, + { + "epoch": 0.73, + "learning_rate": 4.249903317348315e-06, + "loss": 0.9037, + "step": 3513 + }, + { + "epoch": 0.73, + "learning_rate": 4.249009278836935e-06, + "loss": 0.8691, + "step": 3514 + }, + { + "epoch": 0.73, + "learning_rate": 4.248115106119597e-06, + "loss": 0.9276, + "step": 3515 + }, + { + "epoch": 0.73, + "learning_rate": 4.24722079929238e-06, + "loss": 1.0728, + "step": 3516 + }, + { + "epoch": 0.73, + "learning_rate": 4.246326358451377e-06, + "loss": 1.0653, + "step": 3517 + }, + { + "epoch": 0.73, + "learning_rate": 4.245431783692697e-06, + "loss": 0.9523, + "step": 3518 + }, + { + "epoch": 0.73, + "learning_rate": 4.244537075112459e-06, + "loss": 0.7537, + "step": 3519 + }, + { + "epoch": 0.73, + "learning_rate": 4.2436422328068025e-06, + "loss": 0.8829, + "step": 3520 + }, + { + "epoch": 0.73, + "learning_rate": 4.242747256871875e-06, + "loss": 1.1453, + "step": 3521 + }, + { + "epoch": 0.73, + "learning_rate": 4.241852147403845e-06, + "loss": 0.8894, + "step": 3522 + }, + { + "epoch": 0.73, + "learning_rate": 4.24095690449889e-06, + "loss": 0.8774, + "step": 3523 + }, + { + "epoch": 0.73, + "learning_rate": 4.240061528253204e-06, + "loss": 1.1253, + "step": 3524 + }, + { + "epoch": 0.73, + "learning_rate": 4.239166018762995e-06, + "loss": 1.0195, + "step": 3525 + }, + { + "epoch": 0.73, + "learning_rate": 4.2382703761244866e-06, + "loss": 1.0923, + "step": 3526 + }, + { + "epoch": 0.73, + "learning_rate": 4.2373746004339134e-06, + "loss": 1.292, + "step": 3527 + }, + { + "epoch": 0.73, + "learning_rate": 4.236478691787529e-06, + "loss": 0.9297, + "step": 3528 + }, + { + "epoch": 0.73, + "learning_rate": 4.235582650281596e-06, + "loss": 0.9208, + "step": 3529 + }, + { + "epoch": 0.73, + "learning_rate": 4.234686476012396e-06, + "loss": 0.8704, + "step": 3530 + }, + { + "epoch": 0.73, + "learning_rate": 4.233790169076222e-06, + "loss": 0.7931, + "step": 3531 + }, + { + "epoch": 0.73, + "learning_rate": 4.232893729569383e-06, + "loss": 0.9236, + "step": 3532 + }, + { + "epoch": 0.73, + "learning_rate": 4.231997157588201e-06, + "loss": 1.1514, + "step": 3533 + }, + { + "epoch": 0.74, + "learning_rate": 4.231100453229012e-06, + "loss": 0.9594, + "step": 3534 + }, + { + "epoch": 0.74, + "learning_rate": 4.230203616588167e-06, + "loss": 0.8509, + "step": 3535 + }, + { + "epoch": 0.74, + "learning_rate": 4.229306647762031e-06, + "loss": 0.8754, + "step": 3536 + }, + { + "epoch": 0.74, + "learning_rate": 4.228409546846983e-06, + "loss": 1.0972, + "step": 3537 + }, + { + "epoch": 0.74, + "learning_rate": 4.227512313939418e-06, + "loss": 1.0483, + "step": 3538 + }, + { + "epoch": 0.74, + "learning_rate": 4.2266149491357415e-06, + "loss": 0.9213, + "step": 3539 + }, + { + "epoch": 0.74, + "learning_rate": 4.2257174525323766e-06, + "loss": 0.894, + "step": 3540 + }, + { + "epoch": 0.74, + "learning_rate": 4.224819824225759e-06, + "loss": 1.017, + "step": 3541 + }, + { + "epoch": 0.74, + "learning_rate": 4.223922064312339e-06, + "loss": 0.9297, + "step": 3542 + }, + { + "epoch": 0.74, + "learning_rate": 4.22302417288858e-06, + "loss": 0.799, + "step": 3543 + }, + { + "epoch": 0.74, + "learning_rate": 4.222126150050962e-06, + "loss": 1.1207, + "step": 3544 + }, + { + "epoch": 0.74, + "learning_rate": 4.221227995895974e-06, + "loss": 0.7632, + "step": 3545 + }, + { + "epoch": 0.74, + "learning_rate": 4.220329710520127e-06, + "loss": 0.853, + "step": 3546 + }, + { + "epoch": 0.74, + "learning_rate": 4.219431294019938e-06, + "loss": 0.847, + "step": 3547 + }, + { + "epoch": 0.74, + "learning_rate": 4.218532746491946e-06, + "loss": 0.9976, + "step": 3548 + }, + { + "epoch": 0.74, + "learning_rate": 4.217634068032696e-06, + "loss": 1.0239, + "step": 3549 + }, + { + "epoch": 0.74, + "learning_rate": 4.216735258738751e-06, + "loss": 0.8153, + "step": 3550 + }, + { + "epoch": 0.74, + "learning_rate": 4.215836318706691e-06, + "loss": 0.9319, + "step": 3551 + }, + { + "epoch": 0.74, + "learning_rate": 4.214937248033105e-06, + "loss": 0.916, + "step": 3552 + }, + { + "epoch": 0.74, + "learning_rate": 4.2140380468145975e-06, + "loss": 0.9854, + "step": 3553 + }, + { + "epoch": 0.74, + "learning_rate": 4.21313871514779e-06, + "loss": 1.111, + "step": 3554 + }, + { + "epoch": 0.74, + "learning_rate": 4.212239253129313e-06, + "loss": 0.8464, + "step": 3555 + }, + { + "epoch": 0.74, + "learning_rate": 4.211339660855815e-06, + "loss": 0.8019, + "step": 3556 + }, + { + "epoch": 0.74, + "learning_rate": 4.210439938423956e-06, + "loss": 1.1104, + "step": 3557 + }, + { + "epoch": 0.74, + "learning_rate": 4.209540085930414e-06, + "loss": 1.0256, + "step": 3558 + }, + { + "epoch": 0.74, + "learning_rate": 4.2086401034718746e-06, + "loss": 0.9971, + "step": 3559 + }, + { + "epoch": 0.74, + "learning_rate": 4.207739991145043e-06, + "loss": 1.137, + "step": 3560 + }, + { + "epoch": 0.74, + "learning_rate": 4.206839749046635e-06, + "loss": 0.9002, + "step": 3561 + }, + { + "epoch": 0.74, + "learning_rate": 4.205939377273382e-06, + "loss": 0.798, + "step": 3562 + }, + { + "epoch": 0.74, + "learning_rate": 4.205038875922029e-06, + "loss": 0.8058, + "step": 3563 + }, + { + "epoch": 0.74, + "learning_rate": 4.2041382450893356e-06, + "loss": 1.1301, + "step": 3564 + }, + { + "epoch": 0.74, + "learning_rate": 4.203237484872073e-06, + "loss": 0.7505, + "step": 3565 + }, + { + "epoch": 0.74, + "learning_rate": 4.202336595367029e-06, + "loss": 0.7886, + "step": 3566 + }, + { + "epoch": 0.74, + "learning_rate": 4.201435576671004e-06, + "loss": 1.0576, + "step": 3567 + }, + { + "epoch": 0.74, + "learning_rate": 4.200534428880811e-06, + "loss": 0.8532, + "step": 3568 + }, + { + "epoch": 0.74, + "learning_rate": 4.199633152093279e-06, + "loss": 0.9086, + "step": 3569 + }, + { + "epoch": 0.74, + "learning_rate": 4.198731746405251e-06, + "loss": 1.0006, + "step": 3570 + }, + { + "epoch": 0.74, + "learning_rate": 4.197830211913583e-06, + "loss": 1.075, + "step": 3571 + }, + { + "epoch": 0.74, + "learning_rate": 4.1969285487151425e-06, + "loss": 0.8186, + "step": 3572 + }, + { + "epoch": 0.74, + "learning_rate": 4.196026756906817e-06, + "loss": 1.0068, + "step": 3573 + }, + { + "epoch": 0.74, + "learning_rate": 4.1951248365855e-06, + "loss": 0.8313, + "step": 3574 + }, + { + "epoch": 0.74, + "learning_rate": 4.194222787848106e-06, + "loss": 1.0249, + "step": 3575 + }, + { + "epoch": 0.74, + "learning_rate": 4.1933206107915575e-06, + "loss": 0.9919, + "step": 3576 + }, + { + "epoch": 0.74, + "learning_rate": 4.192418305512796e-06, + "loss": 1.1974, + "step": 3577 + }, + { + "epoch": 0.74, + "learning_rate": 4.1915158721087705e-06, + "loss": 0.9332, + "step": 3578 + }, + { + "epoch": 0.74, + "learning_rate": 4.190613310676451e-06, + "loss": 0.8314, + "step": 3579 + }, + { + "epoch": 0.74, + "learning_rate": 4.189710621312816e-06, + "loss": 0.9587, + "step": 3580 + }, + { + "epoch": 0.74, + "learning_rate": 4.18880780411486e-06, + "loss": 0.8235, + "step": 3581 + }, + { + "epoch": 0.75, + "learning_rate": 4.18790485917959e-06, + "loss": 1.1722, + "step": 3582 + }, + { + "epoch": 0.75, + "learning_rate": 4.187001786604028e-06, + "loss": 1.0222, + "step": 3583 + }, + { + "epoch": 0.75, + "learning_rate": 4.186098586485208e-06, + "loss": 0.7984, + "step": 3584 + }, + { + "epoch": 0.75, + "learning_rate": 4.18519525892018e-06, + "loss": 1.0661, + "step": 3585 + }, + { + "epoch": 0.75, + "learning_rate": 4.184291804006006e-06, + "loss": 1.0798, + "step": 3586 + }, + { + "epoch": 0.75, + "learning_rate": 4.183388221839762e-06, + "loss": 1.1587, + "step": 3587 + }, + { + "epoch": 0.75, + "learning_rate": 4.182484512518537e-06, + "loss": 0.9398, + "step": 3588 + }, + { + "epoch": 0.75, + "learning_rate": 4.181580676139437e-06, + "loss": 0.7604, + "step": 3589 + }, + { + "epoch": 0.75, + "learning_rate": 4.180676712799577e-06, + "loss": 0.9776, + "step": 3590 + }, + { + "epoch": 0.75, + "learning_rate": 4.179772622596089e-06, + "loss": 0.8762, + "step": 3591 + }, + { + "epoch": 0.75, + "learning_rate": 4.178868405626116e-06, + "loss": 0.9815, + "step": 3592 + }, + { + "epoch": 0.75, + "learning_rate": 4.177964061986818e-06, + "loss": 0.9589, + "step": 3593 + }, + { + "epoch": 0.75, + "learning_rate": 4.1770595917753645e-06, + "loss": 0.8085, + "step": 3594 + }, + { + "epoch": 0.75, + "learning_rate": 4.176154995088943e-06, + "loss": 1.0293, + "step": 3595 + }, + { + "epoch": 0.75, + "learning_rate": 4.17525027202475e-06, + "loss": 0.8238, + "step": 3596 + }, + { + "epoch": 0.75, + "learning_rate": 4.17434542268e-06, + "loss": 0.8501, + "step": 3597 + }, + { + "epoch": 0.75, + "learning_rate": 4.173440447151918e-06, + "loss": 0.8039, + "step": 3598 + }, + { + "epoch": 0.75, + "learning_rate": 4.172535345537744e-06, + "loss": 0.8847, + "step": 3599 + }, + { + "epoch": 0.75, + "learning_rate": 4.17163011793473e-06, + "loss": 0.8696, + "step": 3600 + }, + { + "epoch": 0.75, + "learning_rate": 4.170724764440144e-06, + "loss": 0.9525, + "step": 3601 + }, + { + "epoch": 0.75, + "learning_rate": 4.1698192851512655e-06, + "loss": 1.0049, + "step": 3602 + }, + { + "epoch": 0.75, + "learning_rate": 4.168913680165389e-06, + "loss": 0.9802, + "step": 3603 + }, + { + "epoch": 0.75, + "learning_rate": 4.168007949579819e-06, + "loss": 0.8999, + "step": 3604 + }, + { + "epoch": 0.75, + "learning_rate": 4.16710209349188e-06, + "loss": 0.9112, + "step": 3605 + }, + { + "epoch": 0.75, + "learning_rate": 4.166196111998903e-06, + "loss": 1.0111, + "step": 3606 + }, + { + "epoch": 0.75, + "learning_rate": 4.165290005198238e-06, + "loss": 0.7999, + "step": 3607 + }, + { + "epoch": 0.75, + "learning_rate": 4.164383773187245e-06, + "loss": 1.0389, + "step": 3608 + }, + { + "epoch": 0.75, + "learning_rate": 4.163477416063298e-06, + "loss": 0.9433, + "step": 3609 + }, + { + "epoch": 0.75, + "learning_rate": 4.162570933923786e-06, + "loss": 1.09, + "step": 3610 + }, + { + "epoch": 0.75, + "learning_rate": 4.1616643268661105e-06, + "loss": 0.83, + "step": 3611 + }, + { + "epoch": 0.75, + "learning_rate": 4.1607575949876855e-06, + "loss": 0.8015, + "step": 3612 + }, + { + "epoch": 0.75, + "learning_rate": 4.15985073838594e-06, + "loss": 1.2488, + "step": 3613 + }, + { + "epoch": 0.75, + "learning_rate": 4.158943757158315e-06, + "loss": 0.963, + "step": 3614 + }, + { + "epoch": 0.75, + "learning_rate": 4.158036651402267e-06, + "loss": 0.7842, + "step": 3615 + }, + { + "epoch": 0.75, + "learning_rate": 4.157129421215263e-06, + "loss": 0.9218, + "step": 3616 + }, + { + "epoch": 0.75, + "learning_rate": 4.156222066694785e-06, + "loss": 1.0587, + "step": 3617 + }, + { + "epoch": 0.75, + "learning_rate": 4.155314587938328e-06, + "loss": 1.1454, + "step": 3618 + }, + { + "epoch": 0.75, + "learning_rate": 4.154406985043402e-06, + "loss": 0.9877, + "step": 3619 + }, + { + "epoch": 0.75, + "learning_rate": 4.153499258107527e-06, + "loss": 1.1088, + "step": 3620 + }, + { + "epoch": 0.75, + "learning_rate": 4.1525914072282395e-06, + "loss": 0.8986, + "step": 3621 + }, + { + "epoch": 0.75, + "learning_rate": 4.151683432503087e-06, + "loss": 0.9411, + "step": 3622 + }, + { + "epoch": 0.75, + "learning_rate": 4.150775334029632e-06, + "loss": 1.0465, + "step": 3623 + }, + { + "epoch": 0.75, + "learning_rate": 4.14986711190545e-06, + "loss": 0.9748, + "step": 3624 + }, + { + "epoch": 0.75, + "learning_rate": 4.148958766228128e-06, + "loss": 0.8212, + "step": 3625 + }, + { + "epoch": 0.75, + "learning_rate": 4.14805029709527e-06, + "loss": 0.8207, + "step": 3626 + }, + { + "epoch": 0.75, + "learning_rate": 4.147141704604488e-06, + "loss": 1.0977, + "step": 3627 + }, + { + "epoch": 0.75, + "learning_rate": 4.146232988853413e-06, + "loss": 0.8563, + "step": 3628 + }, + { + "epoch": 0.75, + "learning_rate": 4.145324149939684e-06, + "loss": 0.9618, + "step": 3629 + }, + { + "epoch": 0.75, + "learning_rate": 4.144415187960958e-06, + "loss": 0.9256, + "step": 3630 + }, + { + "epoch": 0.76, + "learning_rate": 4.1435061030149e-06, + "loss": 1.0686, + "step": 3631 + }, + { + "epoch": 0.76, + "learning_rate": 4.1425968951991936e-06, + "loss": 0.9414, + "step": 3632 + }, + { + "epoch": 0.76, + "learning_rate": 4.141687564611532e-06, + "loss": 1.1524, + "step": 3633 + }, + { + "epoch": 0.76, + "learning_rate": 4.140778111349625e-06, + "loss": 0.8236, + "step": 3634 + }, + { + "epoch": 0.76, + "learning_rate": 4.139868535511189e-06, + "loss": 0.8404, + "step": 3635 + }, + { + "epoch": 0.76, + "learning_rate": 4.138958837193961e-06, + "loss": 0.9354, + "step": 3636 + }, + { + "epoch": 0.76, + "learning_rate": 4.138049016495688e-06, + "loss": 1.031, + "step": 3637 + }, + { + "epoch": 0.76, + "learning_rate": 4.137139073514129e-06, + "loss": 1.1, + "step": 3638 + }, + { + "epoch": 0.76, + "learning_rate": 4.136229008347057e-06, + "loss": 0.9411, + "step": 3639 + }, + { + "epoch": 0.76, + "learning_rate": 4.13531882109226e-06, + "loss": 0.7959, + "step": 3640 + }, + { + "epoch": 0.76, + "learning_rate": 4.134408511847536e-06, + "loss": 1.122, + "step": 3641 + }, + { + "epoch": 0.76, + "learning_rate": 4.133498080710699e-06, + "loss": 0.9287, + "step": 3642 + }, + { + "epoch": 0.76, + "learning_rate": 4.132587527779574e-06, + "loss": 1.1266, + "step": 3643 + }, + { + "epoch": 0.76, + "learning_rate": 4.131676853151999e-06, + "loss": 0.9217, + "step": 3644 + }, + { + "epoch": 0.76, + "learning_rate": 4.130766056925827e-06, + "loss": 0.881, + "step": 3645 + }, + { + "epoch": 0.76, + "learning_rate": 4.129855139198924e-06, + "loss": 1.043, + "step": 3646 + }, + { + "epoch": 0.76, + "learning_rate": 4.128944100069166e-06, + "loss": 0.8369, + "step": 3647 + }, + { + "epoch": 0.76, + "learning_rate": 4.128032939634447e-06, + "loss": 0.9613, + "step": 3648 + }, + { + "epoch": 0.76, + "learning_rate": 4.127121657992667e-06, + "loss": 1.081, + "step": 3649 + }, + { + "epoch": 0.76, + "learning_rate": 4.126210255241745e-06, + "loss": 0.9826, + "step": 3650 + }, + { + "epoch": 0.76, + "learning_rate": 4.125298731479613e-06, + "loss": 0.9601, + "step": 3651 + }, + { + "epoch": 0.76, + "learning_rate": 4.124387086804212e-06, + "loss": 1.0152, + "step": 3652 + }, + { + "epoch": 0.76, + "learning_rate": 4.123475321313498e-06, + "loss": 0.8308, + "step": 3653 + }, + { + "epoch": 0.76, + "learning_rate": 4.122563435105442e-06, + "loss": 0.9653, + "step": 3654 + }, + { + "epoch": 0.76, + "learning_rate": 4.121651428278024e-06, + "loss": 0.8847, + "step": 3655 + }, + { + "epoch": 0.76, + "learning_rate": 4.120739300929241e-06, + "loss": 0.8779, + "step": 3656 + }, + { + "epoch": 0.76, + "learning_rate": 4.119827053157099e-06, + "loss": 0.7514, + "step": 3657 + }, + { + "epoch": 0.76, + "learning_rate": 4.118914685059621e-06, + "loss": 0.9541, + "step": 3658 + }, + { + "epoch": 0.76, + "learning_rate": 4.118002196734839e-06, + "loss": 0.8419, + "step": 3659 + }, + { + "epoch": 0.76, + "learning_rate": 4.117089588280802e-06, + "loss": 0.9234, + "step": 3660 + }, + { + "epoch": 0.76, + "learning_rate": 4.116176859795568e-06, + "loss": 0.9204, + "step": 3661 + }, + { + "epoch": 0.76, + "learning_rate": 4.11526401137721e-06, + "loss": 1.042, + "step": 3662 + }, + { + "epoch": 0.76, + "learning_rate": 4.114351043123813e-06, + "loss": 1.0253, + "step": 3663 + }, + { + "epoch": 0.76, + "learning_rate": 4.113437955133477e-06, + "loss": 1.1657, + "step": 3664 + }, + { + "epoch": 0.76, + "learning_rate": 4.112524747504311e-06, + "loss": 0.8177, + "step": 3665 + }, + { + "epoch": 0.76, + "learning_rate": 4.111611420334441e-06, + "loss": 0.8944, + "step": 3666 + }, + { + "epoch": 0.76, + "learning_rate": 4.110697973722003e-06, + "loss": 1.0394, + "step": 3667 + }, + { + "epoch": 0.76, + "learning_rate": 4.109784407765147e-06, + "loss": 1.0076, + "step": 3668 + }, + { + "epoch": 0.76, + "learning_rate": 4.108870722562035e-06, + "loss": 1.4124, + "step": 3669 + }, + { + "epoch": 0.76, + "learning_rate": 4.107956918210844e-06, + "loss": 1.0489, + "step": 3670 + }, + { + "epoch": 0.76, + "learning_rate": 4.107042994809761e-06, + "loss": 0.8722, + "step": 3671 + }, + { + "epoch": 0.76, + "learning_rate": 4.1061289524569865e-06, + "loss": 0.9246, + "step": 3672 + }, + { + "epoch": 0.76, + "learning_rate": 4.105214791250736e-06, + "loss": 1.1704, + "step": 3673 + }, + { + "epoch": 0.76, + "learning_rate": 4.104300511289234e-06, + "loss": 1.0774, + "step": 3674 + }, + { + "epoch": 0.76, + "learning_rate": 4.103386112670721e-06, + "loss": 0.7417, + "step": 3675 + }, + { + "epoch": 0.76, + "learning_rate": 4.1024715954934506e-06, + "loss": 0.8253, + "step": 3676 + }, + { + "epoch": 0.76, + "learning_rate": 4.101556959855684e-06, + "loss": 0.8101, + "step": 3677 + }, + { + "epoch": 0.76, + "learning_rate": 4.100642205855702e-06, + "loss": 0.7658, + "step": 3678 + }, + { + "epoch": 0.77, + "learning_rate": 4.099727333591792e-06, + "loss": 0.9541, + "step": 3679 + }, + { + "epoch": 0.77, + "learning_rate": 4.09881234316226e-06, + "loss": 0.9408, + "step": 3680 + }, + { + "epoch": 0.77, + "learning_rate": 4.097897234665419e-06, + "loss": 0.9176, + "step": 3681 + }, + { + "epoch": 0.77, + "learning_rate": 4.096982008199599e-06, + "loss": 0.9748, + "step": 3682 + }, + { + "epoch": 0.77, + "learning_rate": 4.0960666638631405e-06, + "loss": 0.919, + "step": 3683 + }, + { + "epoch": 0.77, + "learning_rate": 4.095151201754397e-06, + "loss": 0.8735, + "step": 3684 + }, + { + "epoch": 0.77, + "learning_rate": 4.094235621971735e-06, + "loss": 1.0619, + "step": 3685 + }, + { + "epoch": 0.77, + "learning_rate": 4.093319924613534e-06, + "loss": 0.9872, + "step": 3686 + }, + { + "epoch": 0.77, + "learning_rate": 4.092404109778184e-06, + "loss": 1.1064, + "step": 3687 + }, + { + "epoch": 0.77, + "learning_rate": 4.091488177564091e-06, + "loss": 0.9663, + "step": 3688 + }, + { + "epoch": 0.77, + "learning_rate": 4.090572128069672e-06, + "loss": 0.8888, + "step": 3689 + }, + { + "epoch": 0.77, + "learning_rate": 4.089655961393355e-06, + "loss": 0.9134, + "step": 3690 + }, + { + "epoch": 0.77, + "learning_rate": 4.0887396776335825e-06, + "loss": 1.0994, + "step": 3691 + }, + { + "epoch": 0.77, + "learning_rate": 4.08782327688881e-06, + "loss": 0.8769, + "step": 3692 + }, + { + "epoch": 0.77, + "learning_rate": 4.086906759257503e-06, + "loss": 0.8805, + "step": 3693 + }, + { + "epoch": 0.77, + "learning_rate": 4.085990124838143e-06, + "loss": 0.9009, + "step": 3694 + }, + { + "epoch": 0.77, + "learning_rate": 4.085073373729221e-06, + "loss": 0.9048, + "step": 3695 + }, + { + "epoch": 0.77, + "learning_rate": 4.084156506029243e-06, + "loss": 1.0515, + "step": 3696 + }, + { + "epoch": 0.77, + "learning_rate": 4.083239521836726e-06, + "loss": 0.9042, + "step": 3697 + }, + { + "epoch": 0.77, + "learning_rate": 4.082322421250199e-06, + "loss": 1.1959, + "step": 3698 + }, + { + "epoch": 0.77, + "learning_rate": 4.0814052043682055e-06, + "loss": 0.8233, + "step": 3699 + }, + { + "epoch": 0.77, + "learning_rate": 4.080487871289299e-06, + "loss": 0.9878, + "step": 3700 + }, + { + "epoch": 0.77, + "learning_rate": 4.079570422112049e-06, + "loss": 1.077, + "step": 3701 + }, + { + "epoch": 0.77, + "learning_rate": 4.078652856935034e-06, + "loss": 0.776, + "step": 3702 + }, + { + "epoch": 0.77, + "learning_rate": 4.077735175856847e-06, + "loss": 0.85, + "step": 3703 + }, + { + "epoch": 0.77, + "learning_rate": 4.076817378976091e-06, + "loss": 0.8337, + "step": 3704 + }, + { + "epoch": 0.77, + "learning_rate": 4.075899466391385e-06, + "loss": 0.8942, + "step": 3705 + }, + { + "epoch": 0.77, + "learning_rate": 4.074981438201358e-06, + "loss": 0.9372, + "step": 3706 + }, + { + "epoch": 0.77, + "learning_rate": 4.074063294504653e-06, + "loss": 1.0944, + "step": 3707 + }, + { + "epoch": 0.77, + "learning_rate": 4.073145035399922e-06, + "loss": 0.9716, + "step": 3708 + }, + { + "epoch": 0.77, + "learning_rate": 4.072226660985835e-06, + "loss": 0.9435, + "step": 3709 + }, + { + "epoch": 0.77, + "learning_rate": 4.071308171361069e-06, + "loss": 0.9947, + "step": 3710 + }, + { + "epoch": 0.77, + "learning_rate": 4.070389566624317e-06, + "loss": 1.0681, + "step": 3711 + }, + { + "epoch": 0.77, + "learning_rate": 4.069470846874282e-06, + "loss": 0.8998, + "step": 3712 + }, + { + "epoch": 0.77, + "learning_rate": 4.068552012209682e-06, + "loss": 0.8082, + "step": 3713 + }, + { + "epoch": 0.77, + "learning_rate": 4.0676330627292425e-06, + "loss": 0.7926, + "step": 3714 + }, + { + "epoch": 0.77, + "learning_rate": 4.066713998531708e-06, + "loss": 0.8379, + "step": 3715 + }, + { + "epoch": 0.77, + "learning_rate": 4.065794819715829e-06, + "loss": 0.8172, + "step": 3716 + }, + { + "epoch": 0.77, + "learning_rate": 4.064875526380376e-06, + "loss": 0.8494, + "step": 3717 + }, + { + "epoch": 0.77, + "learning_rate": 4.06395611862412e-06, + "loss": 0.9464, + "step": 3718 + }, + { + "epoch": 0.77, + "learning_rate": 4.063036596545857e-06, + "loss": 0.8772, + "step": 3719 + }, + { + "epoch": 0.77, + "learning_rate": 4.062116960244387e-06, + "loss": 1.1453, + "step": 3720 + }, + { + "epoch": 0.77, + "learning_rate": 4.061197209818525e-06, + "loss": 0.9824, + "step": 3721 + }, + { + "epoch": 0.77, + "learning_rate": 4.060277345367099e-06, + "loss": 0.7891, + "step": 3722 + }, + { + "epoch": 0.77, + "learning_rate": 4.0593573669889476e-06, + "loss": 1.0241, + "step": 3723 + }, + { + "epoch": 0.77, + "learning_rate": 4.058437274782923e-06, + "loss": 0.9529, + "step": 3724 + }, + { + "epoch": 0.77, + "learning_rate": 4.057517068847889e-06, + "loss": 0.7969, + "step": 3725 + }, + { + "epoch": 0.77, + "learning_rate": 4.056596749282721e-06, + "loss": 0.9884, + "step": 3726 + }, + { + "epoch": 0.78, + "learning_rate": 4.0556763161863075e-06, + "loss": 0.9444, + "step": 3727 + }, + { + "epoch": 0.78, + "learning_rate": 4.05475576965755e-06, + "loss": 0.8787, + "step": 3728 + }, + { + "epoch": 0.78, + "learning_rate": 4.05383510979536e-06, + "loss": 0.959, + "step": 3729 + }, + { + "epoch": 0.78, + "learning_rate": 4.052914336698662e-06, + "loss": 0.8991, + "step": 3730 + }, + { + "epoch": 0.78, + "learning_rate": 4.051993450466394e-06, + "loss": 0.9443, + "step": 3731 + }, + { + "epoch": 0.78, + "learning_rate": 4.0510724511975045e-06, + "loss": 0.962, + "step": 3732 + }, + { + "epoch": 0.78, + "learning_rate": 4.050151338990956e-06, + "loss": 0.8835, + "step": 3733 + }, + { + "epoch": 0.78, + "learning_rate": 4.04923011394572e-06, + "loss": 0.8811, + "step": 3734 + }, + { + "epoch": 0.78, + "learning_rate": 4.048308776160785e-06, + "loss": 0.851, + "step": 3735 + }, + { + "epoch": 0.78, + "learning_rate": 4.047387325735144e-06, + "loss": 1.0219, + "step": 3736 + }, + { + "epoch": 0.78, + "learning_rate": 4.046465762767812e-06, + "loss": 0.8954, + "step": 3737 + }, + { + "epoch": 0.78, + "learning_rate": 4.045544087357807e-06, + "loss": 0.8965, + "step": 3738 + }, + { + "epoch": 0.78, + "learning_rate": 4.044622299604166e-06, + "loss": 1.2523, + "step": 3739 + }, + { + "epoch": 0.78, + "learning_rate": 4.043700399605931e-06, + "loss": 0.9719, + "step": 3740 + }, + { + "epoch": 0.78, + "learning_rate": 4.042778387462165e-06, + "loss": 0.876, + "step": 3741 + }, + { + "epoch": 0.78, + "learning_rate": 4.041856263271934e-06, + "loss": 0.8843, + "step": 3742 + }, + { + "epoch": 0.78, + "learning_rate": 4.040934027134323e-06, + "loss": 0.8996, + "step": 3743 + }, + { + "epoch": 0.78, + "learning_rate": 4.040011679148425e-06, + "loss": 0.9244, + "step": 3744 + }, + { + "epoch": 0.78, + "learning_rate": 4.039089219413345e-06, + "loss": 1.0528, + "step": 3745 + }, + { + "epoch": 0.78, + "learning_rate": 4.038166648028204e-06, + "loss": 1.1299, + "step": 3746 + }, + { + "epoch": 0.78, + "learning_rate": 4.03724396509213e-06, + "loss": 0.9025, + "step": 3747 + }, + { + "epoch": 0.78, + "learning_rate": 4.036321170704266e-06, + "loss": 0.94, + "step": 3748 + }, + { + "epoch": 0.78, + "learning_rate": 4.035398264963767e-06, + "loss": 1.0245, + "step": 3749 + }, + { + "epoch": 0.78, + "learning_rate": 4.034475247969797e-06, + "loss": 0.8154, + "step": 3750 + }, + { + "epoch": 0.78, + "learning_rate": 4.033552119821536e-06, + "loss": 0.7817, + "step": 3751 + }, + { + "epoch": 0.78, + "learning_rate": 4.032628880618173e-06, + "loss": 0.9285, + "step": 3752 + }, + { + "epoch": 0.78, + "learning_rate": 4.031705530458911e-06, + "loss": 0.8617, + "step": 3753 + }, + { + "epoch": 0.78, + "learning_rate": 4.030782069442963e-06, + "loss": 0.9429, + "step": 3754 + }, + { + "epoch": 0.78, + "learning_rate": 4.0298584976695565e-06, + "loss": 0.9416, + "step": 3755 + }, + { + "epoch": 0.78, + "learning_rate": 4.028934815237927e-06, + "loss": 1.0098, + "step": 3756 + }, + { + "epoch": 0.78, + "learning_rate": 4.028011022247324e-06, + "loss": 0.8809, + "step": 3757 + }, + { + "epoch": 0.78, + "learning_rate": 4.0270871187970114e-06, + "loss": 0.9568, + "step": 3758 + }, + { + "epoch": 0.78, + "learning_rate": 4.026163104986261e-06, + "loss": 0.9239, + "step": 3759 + }, + { + "epoch": 0.78, + "learning_rate": 4.025238980914357e-06, + "loss": 0.882, + "step": 3760 + }, + { + "epoch": 0.78, + "learning_rate": 4.024314746680599e-06, + "loss": 0.965, + "step": 3761 + }, + { + "epoch": 0.78, + "learning_rate": 4.023390402384295e-06, + "loss": 0.9596, + "step": 3762 + }, + { + "epoch": 0.78, + "learning_rate": 4.022465948124764e-06, + "loss": 0.8378, + "step": 3763 + }, + { + "epoch": 0.78, + "learning_rate": 4.02154138400134e-06, + "loss": 1.1284, + "step": 3764 + }, + { + "epoch": 0.78, + "learning_rate": 4.020616710113368e-06, + "loss": 0.9128, + "step": 3765 + }, + { + "epoch": 0.78, + "learning_rate": 4.0196919265602026e-06, + "loss": 1.0149, + "step": 3766 + }, + { + "epoch": 0.78, + "learning_rate": 4.018767033441213e-06, + "loss": 1.0, + "step": 3767 + }, + { + "epoch": 0.78, + "learning_rate": 4.017842030855778e-06, + "loss": 0.958, + "step": 3768 + }, + { + "epoch": 0.78, + "learning_rate": 4.016916918903289e-06, + "loss": 0.9632, + "step": 3769 + }, + { + "epoch": 0.78, + "learning_rate": 4.01599169768315e-06, + "loss": 1.1191, + "step": 3770 + }, + { + "epoch": 0.78, + "learning_rate": 4.015066367294776e-06, + "loss": 1.1033, + "step": 3771 + }, + { + "epoch": 0.78, + "learning_rate": 4.014140927837593e-06, + "loss": 0.8101, + "step": 3772 + }, + { + "epoch": 0.78, + "learning_rate": 4.01321537941104e-06, + "loss": 0.7795, + "step": 3773 + }, + { + "epoch": 0.78, + "learning_rate": 4.012289722114565e-06, + "loss": 0.9427, + "step": 3774 + }, + { + "epoch": 0.79, + "learning_rate": 4.011363956047634e-06, + "loss": 0.9194, + "step": 3775 + }, + { + "epoch": 0.79, + "learning_rate": 4.010438081309716e-06, + "loss": 0.8964, + "step": 3776 + }, + { + "epoch": 0.79, + "learning_rate": 4.009512098000301e-06, + "loss": 0.9051, + "step": 3777 + }, + { + "epoch": 0.79, + "learning_rate": 4.00858600621888e-06, + "loss": 0.8975, + "step": 3778 + }, + { + "epoch": 0.79, + "learning_rate": 4.0076598060649664e-06, + "loss": 1.1255, + "step": 3779 + }, + { + "epoch": 0.79, + "learning_rate": 4.0067334976380775e-06, + "loss": 1.0583, + "step": 3780 + }, + { + "epoch": 0.79, + "learning_rate": 4.005807081037748e-06, + "loss": 0.8134, + "step": 3781 + }, + { + "epoch": 0.79, + "learning_rate": 4.0048805563635175e-06, + "loss": 0.911, + "step": 3782 + }, + { + "epoch": 0.79, + "learning_rate": 4.003953923714944e-06, + "loss": 0.8437, + "step": 3783 + }, + { + "epoch": 0.79, + "learning_rate": 4.003027183191593e-06, + "loss": 0.9268, + "step": 3784 + }, + { + "epoch": 0.79, + "learning_rate": 4.002100334893042e-06, + "loss": 1.0368, + "step": 3785 + }, + { + "epoch": 0.79, + "learning_rate": 4.001173378918881e-06, + "loss": 0.8529, + "step": 3786 + }, + { + "epoch": 0.79, + "learning_rate": 4.000246315368714e-06, + "loss": 0.9483, + "step": 3787 + }, + { + "epoch": 0.79, + "learning_rate": 3.9993191443421505e-06, + "loss": 0.7421, + "step": 3788 + }, + { + "epoch": 0.79, + "learning_rate": 3.998391865938816e-06, + "loss": 1.0262, + "step": 3789 + }, + { + "epoch": 0.79, + "learning_rate": 3.997464480258347e-06, + "loss": 0.9229, + "step": 3790 + }, + { + "epoch": 0.79, + "learning_rate": 3.996536987400391e-06, + "loss": 0.9232, + "step": 3791 + }, + { + "epoch": 0.79, + "learning_rate": 3.9956093874646075e-06, + "loss": 0.7749, + "step": 3792 + }, + { + "epoch": 0.79, + "learning_rate": 3.994681680550665e-06, + "loss": 1.0376, + "step": 3793 + }, + { + "epoch": 0.79, + "learning_rate": 3.993753866758248e-06, + "loss": 0.811, + "step": 3794 + }, + { + "epoch": 0.79, + "learning_rate": 3.992825946187048e-06, + "loss": 0.8191, + "step": 3795 + }, + { + "epoch": 0.79, + "learning_rate": 3.991897918936771e-06, + "loss": 1.2268, + "step": 3796 + }, + { + "epoch": 0.79, + "learning_rate": 3.990969785107134e-06, + "loss": 0.9575, + "step": 3797 + }, + { + "epoch": 0.79, + "learning_rate": 3.990041544797864e-06, + "loss": 1.2313, + "step": 3798 + }, + { + "epoch": 0.79, + "learning_rate": 3.9891131981087e-06, + "loss": 1.0532, + "step": 3799 + }, + { + "epoch": 0.79, + "learning_rate": 3.988184745139395e-06, + "loss": 1.0772, + "step": 3800 + }, + { + "epoch": 0.79, + "learning_rate": 3.987256185989708e-06, + "loss": 0.9932, + "step": 3801 + }, + { + "epoch": 0.79, + "learning_rate": 3.986327520759415e-06, + "loss": 0.8713, + "step": 3802 + }, + { + "epoch": 0.79, + "learning_rate": 3.985398749548301e-06, + "loss": 1.06, + "step": 3803 + }, + { + "epoch": 0.79, + "learning_rate": 3.984469872456161e-06, + "loss": 1.0111, + "step": 3804 + }, + { + "epoch": 0.79, + "learning_rate": 3.983540889582802e-06, + "loss": 1.0816, + "step": 3805 + }, + { + "epoch": 0.79, + "learning_rate": 3.9826118010280475e-06, + "loss": 0.9222, + "step": 3806 + }, + { + "epoch": 0.79, + "learning_rate": 3.9816826068917226e-06, + "loss": 0.9359, + "step": 3807 + }, + { + "epoch": 0.79, + "learning_rate": 3.980753307273673e-06, + "loss": 1.2298, + "step": 3808 + }, + { + "epoch": 0.79, + "learning_rate": 3.97982390227375e-06, + "loss": 0.9393, + "step": 3809 + }, + { + "epoch": 0.79, + "learning_rate": 3.9788943919918195e-06, + "loss": 0.8787, + "step": 3810 + }, + { + "epoch": 0.79, + "learning_rate": 3.977964776527756e-06, + "loss": 0.7547, + "step": 3811 + }, + { + "epoch": 0.79, + "learning_rate": 3.977035055981446e-06, + "loss": 0.9754, + "step": 3812 + }, + { + "epoch": 0.79, + "learning_rate": 3.97610523045279e-06, + "loss": 0.9537, + "step": 3813 + }, + { + "epoch": 0.79, + "learning_rate": 3.975175300041696e-06, + "loss": 1.0461, + "step": 3814 + }, + { + "epoch": 0.79, + "learning_rate": 3.9742452648480865e-06, + "loss": 0.8756, + "step": 3815 + }, + { + "epoch": 0.79, + "learning_rate": 3.973315124971892e-06, + "loss": 0.9661, + "step": 3816 + }, + { + "epoch": 0.79, + "learning_rate": 3.972384880513057e-06, + "loss": 0.8536, + "step": 3817 + }, + { + "epoch": 0.79, + "learning_rate": 3.971454531571537e-06, + "loss": 0.9163, + "step": 3818 + }, + { + "epoch": 0.79, + "learning_rate": 3.970524078247295e-06, + "loss": 0.8901, + "step": 3819 + }, + { + "epoch": 0.79, + "learning_rate": 3.9695935206403115e-06, + "loss": 0.9448, + "step": 3820 + }, + { + "epoch": 0.79, + "learning_rate": 3.968662858850573e-06, + "loss": 1.0035, + "step": 3821 + }, + { + "epoch": 0.79, + "learning_rate": 3.967732092978079e-06, + "loss": 0.892, + "step": 3822 + }, + { + "epoch": 0.8, + "learning_rate": 3.966801223122841e-06, + "loss": 1.0656, + "step": 3823 + }, + { + "epoch": 0.8, + "learning_rate": 3.965870249384881e-06, + "loss": 0.8404, + "step": 3824 + }, + { + "epoch": 0.8, + "learning_rate": 3.964939171864231e-06, + "loss": 1.0061, + "step": 3825 + }, + { + "epoch": 0.8, + "learning_rate": 3.964007990660936e-06, + "loss": 0.8025, + "step": 3826 + }, + { + "epoch": 0.8, + "learning_rate": 3.963076705875051e-06, + "loss": 1.002, + "step": 3827 + }, + { + "epoch": 0.8, + "learning_rate": 3.962145317606643e-06, + "loss": 0.9586, + "step": 3828 + }, + { + "epoch": 0.8, + "learning_rate": 3.961213825955789e-06, + "loss": 0.8764, + "step": 3829 + }, + { + "epoch": 0.8, + "learning_rate": 3.960282231022577e-06, + "loss": 0.8868, + "step": 3830 + }, + { + "epoch": 0.8, + "learning_rate": 3.95935053290711e-06, + "loss": 1.0285, + "step": 3831 + }, + { + "epoch": 0.8, + "learning_rate": 3.9584187317094955e-06, + "loss": 1.0641, + "step": 3832 + }, + { + "epoch": 0.8, + "learning_rate": 3.957486827529858e-06, + "loss": 1.0307, + "step": 3833 + }, + { + "epoch": 0.8, + "learning_rate": 3.956554820468327e-06, + "loss": 1.1348, + "step": 3834 + }, + { + "epoch": 0.8, + "learning_rate": 3.95562271062505e-06, + "loss": 0.9925, + "step": 3835 + }, + { + "epoch": 0.8, + "learning_rate": 3.954690498100181e-06, + "loss": 0.9428, + "step": 3836 + }, + { + "epoch": 0.8, + "learning_rate": 3.953758182993886e-06, + "loss": 0.8575, + "step": 3837 + }, + { + "epoch": 0.8, + "learning_rate": 3.952825765406342e-06, + "loss": 0.8825, + "step": 3838 + }, + { + "epoch": 0.8, + "learning_rate": 3.9518932454377375e-06, + "loss": 0.8805, + "step": 3839 + }, + { + "epoch": 0.8, + "learning_rate": 3.950960623188272e-06, + "loss": 0.8299, + "step": 3840 + }, + { + "epoch": 0.8, + "learning_rate": 3.950027898758155e-06, + "loss": 0.7966, + "step": 3841 + }, + { + "epoch": 0.8, + "learning_rate": 3.949095072247608e-06, + "loss": 0.9059, + "step": 3842 + }, + { + "epoch": 0.8, + "learning_rate": 3.948162143756864e-06, + "loss": 0.8639, + "step": 3843 + }, + { + "epoch": 0.8, + "learning_rate": 3.947229113386164e-06, + "loss": 1.1212, + "step": 3844 + }, + { + "epoch": 0.8, + "learning_rate": 3.946295981235765e-06, + "loss": 0.8818, + "step": 3845 + }, + { + "epoch": 0.8, + "learning_rate": 3.945362747405929e-06, + "loss": 0.8763, + "step": 3846 + }, + { + "epoch": 0.8, + "learning_rate": 3.944429411996935e-06, + "loss": 1.0639, + "step": 3847 + }, + { + "epoch": 0.8, + "learning_rate": 3.9434959751090664e-06, + "loss": 0.8457, + "step": 3848 + }, + { + "epoch": 0.8, + "learning_rate": 3.942562436842623e-06, + "loss": 0.9986, + "step": 3849 + }, + { + "epoch": 0.8, + "learning_rate": 3.941628797297913e-06, + "loss": 1.0204, + "step": 3850 + }, + { + "epoch": 0.8, + "learning_rate": 3.940695056575257e-06, + "loss": 0.8287, + "step": 3851 + }, + { + "epoch": 0.8, + "learning_rate": 3.939761214774982e-06, + "loss": 1.0085, + "step": 3852 + }, + { + "epoch": 0.8, + "learning_rate": 3.938827271997434e-06, + "loss": 1.1025, + "step": 3853 + }, + { + "epoch": 0.8, + "learning_rate": 3.937893228342961e-06, + "loss": 0.7536, + "step": 3854 + }, + { + "epoch": 0.8, + "learning_rate": 3.936959083911928e-06, + "loss": 0.9094, + "step": 3855 + }, + { + "epoch": 0.8, + "learning_rate": 3.936024838804708e-06, + "loss": 0.9964, + "step": 3856 + }, + { + "epoch": 0.8, + "learning_rate": 3.935090493121686e-06, + "loss": 1.1451, + "step": 3857 + }, + { + "epoch": 0.8, + "learning_rate": 3.934156046963257e-06, + "loss": 1.0174, + "step": 3858 + }, + { + "epoch": 0.8, + "learning_rate": 3.933221500429828e-06, + "loss": 0.8157, + "step": 3859 + }, + { + "epoch": 0.8, + "learning_rate": 3.932286853621816e-06, + "loss": 1.1481, + "step": 3860 + }, + { + "epoch": 0.8, + "learning_rate": 3.931352106639647e-06, + "loss": 1.1517, + "step": 3861 + }, + { + "epoch": 0.8, + "learning_rate": 3.930417259583761e-06, + "loss": 1.0351, + "step": 3862 + }, + { + "epoch": 0.8, + "learning_rate": 3.929482312554607e-06, + "loss": 0.9193, + "step": 3863 + }, + { + "epoch": 0.8, + "learning_rate": 3.928547265652644e-06, + "loss": 1.0855, + "step": 3864 + }, + { + "epoch": 0.8, + "learning_rate": 3.927612118978345e-06, + "loss": 0.9827, + "step": 3865 + }, + { + "epoch": 0.8, + "learning_rate": 3.926676872632188e-06, + "loss": 0.9447, + "step": 3866 + }, + { + "epoch": 0.8, + "learning_rate": 3.92574152671467e-06, + "loss": 0.8476, + "step": 3867 + }, + { + "epoch": 0.8, + "learning_rate": 3.9248060813262886e-06, + "loss": 1.1827, + "step": 3868 + }, + { + "epoch": 0.8, + "learning_rate": 3.923870536567561e-06, + "loss": 1.0924, + "step": 3869 + }, + { + "epoch": 0.8, + "learning_rate": 3.9229348925390095e-06, + "loss": 1.013, + "step": 3870 + }, + { + "epoch": 0.81, + "learning_rate": 3.92199914934117e-06, + "loss": 0.7901, + "step": 3871 + }, + { + "epoch": 0.81, + "learning_rate": 3.921063307074587e-06, + "loss": 0.9834, + "step": 3872 + }, + { + "epoch": 0.81, + "learning_rate": 3.920127365839818e-06, + "loss": 1.0297, + "step": 3873 + }, + { + "epoch": 0.81, + "learning_rate": 3.919191325737429e-06, + "loss": 0.8885, + "step": 3874 + }, + { + "epoch": 0.81, + "learning_rate": 3.918255186867998e-06, + "loss": 1.0376, + "step": 3875 + }, + { + "epoch": 0.81, + "learning_rate": 3.917318949332112e-06, + "loss": 0.8614, + "step": 3876 + }, + { + "epoch": 0.81, + "learning_rate": 3.916382613230371e-06, + "loss": 0.9026, + "step": 3877 + }, + { + "epoch": 0.81, + "learning_rate": 3.915446178663383e-06, + "loss": 1.0544, + "step": 3878 + }, + { + "epoch": 0.81, + "learning_rate": 3.914509645731769e-06, + "loss": 1.0426, + "step": 3879 + }, + { + "epoch": 0.81, + "learning_rate": 3.913573014536159e-06, + "loss": 0.829, + "step": 3880 + }, + { + "epoch": 0.81, + "learning_rate": 3.912636285177194e-06, + "loss": 0.8553, + "step": 3881 + }, + { + "epoch": 0.81, + "learning_rate": 3.911699457755524e-06, + "loss": 0.9856, + "step": 3882 + }, + { + "epoch": 0.81, + "learning_rate": 3.9107625323718135e-06, + "loss": 0.7343, + "step": 3883 + }, + { + "epoch": 0.81, + "learning_rate": 3.909825509126733e-06, + "loss": 0.9011, + "step": 3884 + }, + { + "epoch": 0.81, + "learning_rate": 3.908888388120968e-06, + "loss": 1.358, + "step": 3885 + }, + { + "epoch": 0.81, + "learning_rate": 3.907951169455208e-06, + "loss": 0.7838, + "step": 3886 + }, + { + "epoch": 0.81, + "learning_rate": 3.907013853230162e-06, + "loss": 0.9331, + "step": 3887 + }, + { + "epoch": 0.81, + "learning_rate": 3.906076439546541e-06, + "loss": 0.8857, + "step": 3888 + }, + { + "epoch": 0.81, + "learning_rate": 3.905138928505072e-06, + "loss": 0.817, + "step": 3889 + }, + { + "epoch": 0.81, + "learning_rate": 3.904201320206488e-06, + "loss": 0.8566, + "step": 3890 + }, + { + "epoch": 0.81, + "learning_rate": 3.903263614751539e-06, + "loss": 0.8787, + "step": 3891 + }, + { + "epoch": 0.81, + "learning_rate": 3.902325812240977e-06, + "loss": 0.7788, + "step": 3892 + }, + { + "epoch": 0.81, + "learning_rate": 3.901387912775572e-06, + "loss": 0.8107, + "step": 3893 + }, + { + "epoch": 0.81, + "learning_rate": 3.9004499164561e-06, + "loss": 0.8029, + "step": 3894 + }, + { + "epoch": 0.81, + "learning_rate": 3.899511823383348e-06, + "loss": 0.9557, + "step": 3895 + }, + { + "epoch": 0.81, + "learning_rate": 3.898573633658116e-06, + "loss": 1.012, + "step": 3896 + }, + { + "epoch": 0.81, + "learning_rate": 3.897635347381211e-06, + "loss": 0.9658, + "step": 3897 + }, + { + "epoch": 0.81, + "learning_rate": 3.896696964653451e-06, + "loss": 1.1055, + "step": 3898 + }, + { + "epoch": 0.81, + "learning_rate": 3.8957584855756666e-06, + "loss": 0.8522, + "step": 3899 + }, + { + "epoch": 0.81, + "learning_rate": 3.894819910248697e-06, + "loss": 0.8496, + "step": 3900 + }, + { + "epoch": 0.81, + "learning_rate": 3.893881238773392e-06, + "loss": 0.9057, + "step": 3901 + }, + { + "epoch": 0.81, + "learning_rate": 3.892942471250612e-06, + "loss": 0.8052, + "step": 3902 + }, + { + "epoch": 0.81, + "learning_rate": 3.892003607781226e-06, + "loss": 0.9694, + "step": 3903 + }, + { + "epoch": 0.81, + "learning_rate": 3.891064648466117e-06, + "loss": 0.8927, + "step": 3904 + }, + { + "epoch": 0.81, + "learning_rate": 3.8901255934061745e-06, + "loss": 0.8468, + "step": 3905 + }, + { + "epoch": 0.81, + "learning_rate": 3.889186442702301e-06, + "loss": 0.8252, + "step": 3906 + }, + { + "epoch": 0.81, + "learning_rate": 3.888247196455407e-06, + "loss": 0.9465, + "step": 3907 + }, + { + "epoch": 0.81, + "learning_rate": 3.8873078547664165e-06, + "loss": 0.8557, + "step": 3908 + }, + { + "epoch": 0.81, + "learning_rate": 3.886368417736259e-06, + "loss": 0.8883, + "step": 3909 + }, + { + "epoch": 0.81, + "learning_rate": 3.885428885465879e-06, + "loss": 0.8395, + "step": 3910 + }, + { + "epoch": 0.81, + "learning_rate": 3.884489258056227e-06, + "loss": 0.7982, + "step": 3911 + }, + { + "epoch": 0.81, + "learning_rate": 3.88354953560827e-06, + "loss": 1.0204, + "step": 3912 + }, + { + "epoch": 0.81, + "learning_rate": 3.882609718222976e-06, + "loss": 1.2195, + "step": 3913 + }, + { + "epoch": 0.81, + "learning_rate": 3.881669806001332e-06, + "loss": 0.8069, + "step": 3914 + }, + { + "epoch": 0.81, + "learning_rate": 3.88072979904433e-06, + "loss": 0.8882, + "step": 3915 + }, + { + "epoch": 0.81, + "learning_rate": 3.8797896974529745e-06, + "loss": 0.8832, + "step": 3916 + }, + { + "epoch": 0.81, + "learning_rate": 3.878849501328277e-06, + "loss": 0.8312, + "step": 3917 + }, + { + "epoch": 0.81, + "learning_rate": 3.877909210771265e-06, + "loss": 0.981, + "step": 3918 + }, + { + "epoch": 0.82, + "learning_rate": 3.87696882588297e-06, + "loss": 1.0386, + "step": 3919 + }, + { + "epoch": 0.82, + "learning_rate": 3.876028346764438e-06, + "loss": 1.0622, + "step": 3920 + }, + { + "epoch": 0.82, + "learning_rate": 3.875087773516722e-06, + "loss": 0.8422, + "step": 3921 + }, + { + "epoch": 0.82, + "learning_rate": 3.874147106240888e-06, + "loss": 1.1257, + "step": 3922 + }, + { + "epoch": 0.82, + "learning_rate": 3.873206345038009e-06, + "loss": 1.1398, + "step": 3923 + }, + { + "epoch": 0.82, + "learning_rate": 3.872265490009171e-06, + "loss": 1.0428, + "step": 3924 + }, + { + "epoch": 0.82, + "learning_rate": 3.871324541255468e-06, + "loss": 0.8139, + "step": 3925 + }, + { + "epoch": 0.82, + "learning_rate": 3.870383498878006e-06, + "loss": 0.836, + "step": 3926 + }, + { + "epoch": 0.82, + "learning_rate": 3.869442362977898e-06, + "loss": 1.0325, + "step": 3927 + }, + { + "epoch": 0.82, + "learning_rate": 3.868501133656271e-06, + "loss": 1.0455, + "step": 3928 + }, + { + "epoch": 0.82, + "learning_rate": 3.867559811014258e-06, + "loss": 0.904, + "step": 3929 + }, + { + "epoch": 0.82, + "learning_rate": 3.866618395153007e-06, + "loss": 1.1511, + "step": 3930 + }, + { + "epoch": 0.82, + "learning_rate": 3.86567688617367e-06, + "loss": 1.0719, + "step": 3931 + }, + { + "epoch": 0.82, + "learning_rate": 3.864735284177414e-06, + "loss": 0.792, + "step": 3932 + }, + { + "epoch": 0.82, + "learning_rate": 3.863793589265412e-06, + "loss": 1.0233, + "step": 3933 + }, + { + "epoch": 0.82, + "learning_rate": 3.8628518015388526e-06, + "loss": 1.089, + "step": 3934 + }, + { + "epoch": 0.82, + "learning_rate": 3.861909921098928e-06, + "loss": 0.7632, + "step": 3935 + }, + { + "epoch": 0.82, + "learning_rate": 3.860967948046843e-06, + "loss": 1.0903, + "step": 3936 + }, + { + "epoch": 0.82, + "learning_rate": 3.860025882483814e-06, + "loss": 0.9723, + "step": 3937 + }, + { + "epoch": 0.82, + "learning_rate": 3.859083724511066e-06, + "loss": 0.8795, + "step": 3938 + }, + { + "epoch": 0.82, + "learning_rate": 3.858141474229831e-06, + "loss": 0.8388, + "step": 3939 + }, + { + "epoch": 0.82, + "learning_rate": 3.8571991317413576e-06, + "loss": 0.9238, + "step": 3940 + }, + { + "epoch": 0.82, + "learning_rate": 3.856256697146898e-06, + "loss": 1.0554, + "step": 3941 + }, + { + "epoch": 0.82, + "learning_rate": 3.855314170547718e-06, + "loss": 0.9815, + "step": 3942 + }, + { + "epoch": 0.82, + "learning_rate": 3.854371552045092e-06, + "loss": 1.0006, + "step": 3943 + }, + { + "epoch": 0.82, + "learning_rate": 3.853428841740302e-06, + "loss": 0.9339, + "step": 3944 + }, + { + "epoch": 0.82, + "learning_rate": 3.852486039734644e-06, + "loss": 1.2821, + "step": 3945 + }, + { + "epoch": 0.82, + "learning_rate": 3.851543146129423e-06, + "loss": 0.791, + "step": 3946 + }, + { + "epoch": 0.82, + "learning_rate": 3.8506001610259515e-06, + "loss": 0.9255, + "step": 3947 + }, + { + "epoch": 0.82, + "learning_rate": 3.849657084525554e-06, + "loss": 0.8988, + "step": 3948 + }, + { + "epoch": 0.82, + "learning_rate": 3.8487139167295615e-06, + "loss": 1.0086, + "step": 3949 + }, + { + "epoch": 0.82, + "learning_rate": 3.8477706577393204e-06, + "loss": 0.847, + "step": 3950 + }, + { + "epoch": 0.82, + "learning_rate": 3.846827307656182e-06, + "loss": 0.7646, + "step": 3951 + }, + { + "epoch": 0.82, + "learning_rate": 3.8458838665815114e-06, + "loss": 0.921, + "step": 3952 + }, + { + "epoch": 0.82, + "learning_rate": 3.844940334616678e-06, + "loss": 0.8931, + "step": 3953 + }, + { + "epoch": 0.82, + "learning_rate": 3.843996711863067e-06, + "loss": 0.9235, + "step": 3954 + }, + { + "epoch": 0.82, + "learning_rate": 3.8430529984220685e-06, + "loss": 0.9882, + "step": 3955 + }, + { + "epoch": 0.82, + "learning_rate": 3.842109194395087e-06, + "loss": 1.102, + "step": 3956 + }, + { + "epoch": 0.82, + "learning_rate": 3.84116529988353e-06, + "loss": 1.0022, + "step": 3957 + }, + { + "epoch": 0.82, + "learning_rate": 3.840221314988824e-06, + "loss": 0.9483, + "step": 3958 + }, + { + "epoch": 0.82, + "learning_rate": 3.839277239812396e-06, + "loss": 0.8593, + "step": 3959 + }, + { + "epoch": 0.82, + "learning_rate": 3.83833307445569e-06, + "loss": 0.8057, + "step": 3960 + }, + { + "epoch": 0.82, + "learning_rate": 3.837388819020154e-06, + "loss": 1.1992, + "step": 3961 + }, + { + "epoch": 0.82, + "learning_rate": 3.83644447360725e-06, + "loss": 1.0125, + "step": 3962 + }, + { + "epoch": 0.82, + "learning_rate": 3.8355000383184466e-06, + "loss": 0.8311, + "step": 3963 + }, + { + "epoch": 0.82, + "learning_rate": 3.8345555132552224e-06, + "loss": 0.6868, + "step": 3964 + }, + { + "epoch": 0.82, + "learning_rate": 3.83361089851907e-06, + "loss": 0.8084, + "step": 3965 + }, + { + "epoch": 0.82, + "learning_rate": 3.832666194211484e-06, + "loss": 0.8894, + "step": 3966 + }, + { + "epoch": 0.83, + "learning_rate": 3.831721400433976e-06, + "loss": 0.9127, + "step": 3967 + }, + { + "epoch": 0.83, + "learning_rate": 3.830776517288063e-06, + "loss": 0.7486, + "step": 3968 + }, + { + "epoch": 0.83, + "learning_rate": 3.829831544875273e-06, + "loss": 1.0235, + "step": 3969 + }, + { + "epoch": 0.83, + "learning_rate": 3.828886483297142e-06, + "loss": 0.9726, + "step": 3970 + }, + { + "epoch": 0.83, + "learning_rate": 3.827941332655217e-06, + "loss": 0.997, + "step": 3971 + }, + { + "epoch": 0.83, + "learning_rate": 3.826996093051056e-06, + "loss": 1.0046, + "step": 3972 + }, + { + "epoch": 0.83, + "learning_rate": 3.826050764586223e-06, + "loss": 1.1332, + "step": 3973 + }, + { + "epoch": 0.83, + "learning_rate": 3.825105347362295e-06, + "loss": 0.9745, + "step": 3974 + }, + { + "epoch": 0.83, + "learning_rate": 3.824159841480856e-06, + "loss": 0.7514, + "step": 3975 + }, + { + "epoch": 0.83, + "learning_rate": 3.8232142470435e-06, + "loss": 1.0914, + "step": 3976 + }, + { + "epoch": 0.83, + "learning_rate": 3.822268564151833e-06, + "loss": 0.7727, + "step": 3977 + }, + { + "epoch": 0.83, + "learning_rate": 3.821322792907466e-06, + "loss": 1.1581, + "step": 3978 + }, + { + "epoch": 0.83, + "learning_rate": 3.820376933412026e-06, + "loss": 1.0128, + "step": 3979 + }, + { + "epoch": 0.83, + "learning_rate": 3.81943098576714e-06, + "loss": 0.9484, + "step": 3980 + }, + { + "epoch": 0.83, + "learning_rate": 3.818484950074456e-06, + "loss": 1.034, + "step": 3981 + }, + { + "epoch": 0.83, + "learning_rate": 3.817538826435621e-06, + "loss": 1.0288, + "step": 3982 + }, + { + "epoch": 0.83, + "learning_rate": 3.816592614952298e-06, + "loss": 0.8392, + "step": 3983 + }, + { + "epoch": 0.83, + "learning_rate": 3.815646315726155e-06, + "loss": 1.0208, + "step": 3984 + }, + { + "epoch": 0.83, + "learning_rate": 3.814699928858875e-06, + "loss": 1.0332, + "step": 3985 + }, + { + "epoch": 0.83, + "learning_rate": 3.8137534544521463e-06, + "loss": 0.9151, + "step": 3986 + }, + { + "epoch": 0.83, + "learning_rate": 3.8128068926076663e-06, + "loss": 0.8192, + "step": 3987 + }, + { + "epoch": 0.83, + "learning_rate": 3.811860243427144e-06, + "loss": 0.9774, + "step": 3988 + }, + { + "epoch": 0.83, + "learning_rate": 3.810913507012296e-06, + "loss": 0.8152, + "step": 3989 + }, + { + "epoch": 0.83, + "learning_rate": 3.8099666834648503e-06, + "loss": 1.0455, + "step": 3990 + }, + { + "epoch": 0.83, + "learning_rate": 3.8090197728865415e-06, + "loss": 0.9135, + "step": 3991 + }, + { + "epoch": 0.83, + "learning_rate": 3.808072775379116e-06, + "loss": 0.8247, + "step": 3992 + }, + { + "epoch": 0.83, + "learning_rate": 3.807125691044329e-06, + "loss": 1.0436, + "step": 3993 + }, + { + "epoch": 0.83, + "learning_rate": 3.8061785199839433e-06, + "loss": 0.8997, + "step": 3994 + }, + { + "epoch": 0.83, + "learning_rate": 3.8052312622997337e-06, + "loss": 1.2254, + "step": 3995 + }, + { + "epoch": 0.83, + "learning_rate": 3.8042839180934824e-06, + "loss": 0.8059, + "step": 3996 + }, + { + "epoch": 0.83, + "learning_rate": 3.8033364874669815e-06, + "loss": 0.8674, + "step": 3997 + }, + { + "epoch": 0.83, + "learning_rate": 3.8023889705220313e-06, + "loss": 1.0595, + "step": 3998 + }, + { + "epoch": 0.83, + "learning_rate": 3.801441367360445e-06, + "loss": 0.8738, + "step": 3999 + }, + { + "epoch": 0.83, + "learning_rate": 3.8004936780840405e-06, + "loss": 0.8604, + "step": 4000 + }, + { + "epoch": 0.83, + "eval_loss": NaN, + "eval_runtime": 15.0493, + "eval_samples_per_second": 351.843, + "eval_steps_per_second": 43.989, + "step": 4000 + }, + { + "epoch": 0.83, + "learning_rate": 3.7995459027946474e-06, + "loss": 1.0068, + "step": 4001 + }, + { + "epoch": 0.83, + "learning_rate": 3.7985980415941033e-06, + "loss": 1.1087, + "step": 4002 + }, + { + "epoch": 0.83, + "learning_rate": 3.7976500945842576e-06, + "loss": 0.9803, + "step": 4003 + }, + { + "epoch": 0.83, + "learning_rate": 3.7967020618669655e-06, + "loss": 0.982, + "step": 4004 + }, + { + "epoch": 0.83, + "learning_rate": 3.7957539435440946e-06, + "loss": 0.9607, + "step": 4005 + }, + { + "epoch": 0.83, + "learning_rate": 3.7948057397175185e-06, + "loss": 0.9886, + "step": 4006 + }, + { + "epoch": 0.83, + "learning_rate": 3.7938574504891224e-06, + "loss": 0.9852, + "step": 4007 + }, + { + "epoch": 0.83, + "learning_rate": 3.7929090759608e-06, + "loss": 0.9313, + "step": 4008 + }, + { + "epoch": 0.83, + "learning_rate": 3.791960616234454e-06, + "loss": 0.7548, + "step": 4009 + }, + { + "epoch": 0.83, + "learning_rate": 3.791012071411996e-06, + "loss": 1.0853, + "step": 4010 + }, + { + "epoch": 0.83, + "learning_rate": 3.7900634415953483e-06, + "loss": 0.8809, + "step": 4011 + }, + { + "epoch": 0.83, + "learning_rate": 3.7891147268864387e-06, + "loss": 0.9649, + "step": 4012 + }, + { + "epoch": 0.83, + "learning_rate": 3.78816592738721e-06, + "loss": 0.9716, + "step": 4013 + }, + { + "epoch": 0.83, + "learning_rate": 3.7872170431996067e-06, + "loss": 0.9801, + "step": 4014 + }, + { + "epoch": 0.84, + "learning_rate": 3.78626807442559e-06, + "loss": 0.8498, + "step": 4015 + }, + { + "epoch": 0.84, + "learning_rate": 3.7853190211671237e-06, + "loss": 0.9078, + "step": 4016 + }, + { + "epoch": 0.84, + "learning_rate": 3.7843698835261848e-06, + "loss": 1.0606, + "step": 4017 + }, + { + "epoch": 0.84, + "learning_rate": 3.7834206616047573e-06, + "loss": 0.9788, + "step": 4018 + }, + { + "epoch": 0.84, + "learning_rate": 3.782471355504837e-06, + "loss": 0.8211, + "step": 4019 + }, + { + "epoch": 0.84, + "learning_rate": 3.781521965328424e-06, + "loss": 1.0115, + "step": 4020 + }, + { + "epoch": 0.84, + "learning_rate": 3.7805724911775327e-06, + "loss": 0.912, + "step": 4021 + }, + { + "epoch": 0.84, + "learning_rate": 3.779622933154182e-06, + "loss": 0.9372, + "step": 4022 + }, + { + "epoch": 0.84, + "learning_rate": 3.7786732913604038e-06, + "loss": 0.9856, + "step": 4023 + }, + { + "epoch": 0.84, + "learning_rate": 3.7777235658982345e-06, + "loss": 0.8821, + "step": 4024 + }, + { + "epoch": 0.84, + "learning_rate": 3.776773756869725e-06, + "loss": 0.9869, + "step": 4025 + }, + { + "epoch": 0.84, + "learning_rate": 3.7758238643769297e-06, + "loss": 0.8875, + "step": 4026 + }, + { + "epoch": 0.84, + "learning_rate": 3.7748738885219157e-06, + "loss": 0.8873, + "step": 4027 + }, + { + "epoch": 0.84, + "learning_rate": 3.7739238294067587e-06, + "loss": 0.87, + "step": 4028 + }, + { + "epoch": 0.84, + "learning_rate": 3.77297368713354e-06, + "loss": 1.078, + "step": 4029 + }, + { + "epoch": 0.84, + "learning_rate": 3.772023461804355e-06, + "loss": 0.9641, + "step": 4030 + }, + { + "epoch": 0.84, + "learning_rate": 3.7710731535213036e-06, + "loss": 0.8619, + "step": 4031 + }, + { + "epoch": 0.84, + "learning_rate": 3.770122762386497e-06, + "loss": 0.9749, + "step": 4032 + }, + { + "epoch": 0.84, + "learning_rate": 3.7691722885020543e-06, + "loss": 0.7779, + "step": 4033 + }, + { + "epoch": 0.84, + "learning_rate": 3.7682217319701042e-06, + "loss": 1.0465, + "step": 4034 + }, + { + "epoch": 0.84, + "learning_rate": 3.7672710928927834e-06, + "loss": 0.8022, + "step": 4035 + }, + { + "epoch": 0.84, + "learning_rate": 3.7663203713722394e-06, + "loss": 1.1332, + "step": 4036 + }, + { + "epoch": 0.84, + "learning_rate": 3.765369567510626e-06, + "loss": 1.1939, + "step": 4037 + }, + { + "epoch": 0.84, + "learning_rate": 3.764418681410107e-06, + "loss": 1.034, + "step": 4038 + }, + { + "epoch": 0.84, + "learning_rate": 3.763467713172854e-06, + "loss": 1.0067, + "step": 4039 + }, + { + "epoch": 0.84, + "learning_rate": 3.7625166629010517e-06, + "loss": 0.8228, + "step": 4040 + }, + { + "epoch": 0.84, + "learning_rate": 3.761565530696887e-06, + "loss": 0.7287, + "step": 4041 + }, + { + "epoch": 0.84, + "learning_rate": 3.760614316662561e-06, + "loss": 1.0984, + "step": 4042 + }, + { + "epoch": 0.84, + "learning_rate": 3.7596630209002807e-06, + "loss": 1.0487, + "step": 4043 + }, + { + "epoch": 0.84, + "learning_rate": 3.7587116435122636e-06, + "loss": 0.8251, + "step": 4044 + }, + { + "epoch": 0.84, + "learning_rate": 3.757760184600734e-06, + "loss": 1.0126, + "step": 4045 + }, + { + "epoch": 0.84, + "learning_rate": 3.756808644267927e-06, + "loss": 1.045, + "step": 4046 + }, + { + "epoch": 0.84, + "learning_rate": 3.7558570226160844e-06, + "loss": 0.9922, + "step": 4047 + }, + { + "epoch": 0.84, + "learning_rate": 3.754905319747459e-06, + "loss": 1.0089, + "step": 4048 + }, + { + "epoch": 0.84, + "learning_rate": 3.7539535357643105e-06, + "loss": 1.1247, + "step": 4049 + }, + { + "epoch": 0.84, + "learning_rate": 3.7530016707689096e-06, + "loss": 1.032, + "step": 4050 + }, + { + "epoch": 0.84, + "learning_rate": 3.752049724863532e-06, + "loss": 0.8223, + "step": 4051 + }, + { + "epoch": 0.84, + "learning_rate": 3.7510976981504655e-06, + "loss": 1.0056, + "step": 4052 + }, + { + "epoch": 0.84, + "learning_rate": 3.7501455907320045e-06, + "loss": 0.9726, + "step": 4053 + }, + { + "epoch": 0.84, + "learning_rate": 3.7491934027104536e-06, + "loss": 0.9594, + "step": 4054 + }, + { + "epoch": 0.84, + "learning_rate": 3.7482411341881246e-06, + "loss": 0.7893, + "step": 4055 + }, + { + "epoch": 0.84, + "learning_rate": 3.7472887852673397e-06, + "loss": 0.7602, + "step": 4056 + }, + { + "epoch": 0.84, + "learning_rate": 3.746336356050428e-06, + "loss": 0.8603, + "step": 4057 + }, + { + "epoch": 0.84, + "learning_rate": 3.7453838466397286e-06, + "loss": 0.9778, + "step": 4058 + }, + { + "epoch": 0.84, + "learning_rate": 3.744431257137587e-06, + "loss": 0.8884, + "step": 4059 + }, + { + "epoch": 0.84, + "learning_rate": 3.7434785876463604e-06, + "loss": 0.9505, + "step": 4060 + }, + { + "epoch": 0.84, + "learning_rate": 3.742525838268413e-06, + "loss": 0.8615, + "step": 4061 + }, + { + "epoch": 0.84, + "learning_rate": 3.7415730091061173e-06, + "loss": 0.9678, + "step": 4062 + }, + { + "epoch": 0.85, + "learning_rate": 3.740620100261854e-06, + "loss": 0.7841, + "step": 4063 + }, + { + "epoch": 0.85, + "learning_rate": 3.739667111838014e-06, + "loss": 1.0094, + "step": 4064 + }, + { + "epoch": 0.85, + "learning_rate": 3.738714043936996e-06, + "loss": 0.8817, + "step": 4065 + }, + { + "epoch": 0.85, + "learning_rate": 3.7377608966612063e-06, + "loss": 0.9775, + "step": 4066 + }, + { + "epoch": 0.85, + "learning_rate": 3.7368076701130607e-06, + "loss": 1.0554, + "step": 4067 + }, + { + "epoch": 0.85, + "learning_rate": 3.735854364394984e-06, + "loss": 0.8733, + "step": 4068 + }, + { + "epoch": 0.85, + "learning_rate": 3.7349009796094087e-06, + "loss": 0.9476, + "step": 4069 + }, + { + "epoch": 0.85, + "learning_rate": 3.733947515858774e-06, + "loss": 0.9747, + "step": 4070 + }, + { + "epoch": 0.85, + "learning_rate": 3.732993973245533e-06, + "loss": 1.1685, + "step": 4071 + }, + { + "epoch": 0.85, + "learning_rate": 3.7320403518721402e-06, + "loss": 0.9597, + "step": 4072 + }, + { + "epoch": 0.85, + "learning_rate": 3.7310866518410653e-06, + "loss": 0.7094, + "step": 4073 + }, + { + "epoch": 0.85, + "learning_rate": 3.7301328732547814e-06, + "loss": 0.9585, + "step": 4074 + }, + { + "epoch": 0.85, + "learning_rate": 3.7291790162157722e-06, + "loss": 1.2543, + "step": 4075 + }, + { + "epoch": 0.85, + "learning_rate": 3.7282250808265293e-06, + "loss": 0.8419, + "step": 4076 + }, + { + "epoch": 0.85, + "learning_rate": 3.7272710671895546e-06, + "loss": 0.8111, + "step": 4077 + }, + { + "epoch": 0.85, + "learning_rate": 3.726316975407354e-06, + "loss": 0.8578, + "step": 4078 + }, + { + "epoch": 0.85, + "learning_rate": 3.7253628055824474e-06, + "loss": 1.2262, + "step": 4079 + }, + { + "epoch": 0.85, + "learning_rate": 3.724408557817359e-06, + "loss": 1.0303, + "step": 4080 + }, + { + "epoch": 0.85, + "learning_rate": 3.723454232214623e-06, + "loss": 0.9475, + "step": 4081 + }, + { + "epoch": 0.85, + "learning_rate": 3.72249982887678e-06, + "loss": 0.9208, + "step": 4082 + }, + { + "epoch": 0.85, + "learning_rate": 3.7215453479063834e-06, + "loss": 0.9444, + "step": 4083 + }, + { + "epoch": 0.85, + "learning_rate": 3.72059078940599e-06, + "loss": 0.9563, + "step": 4084 + }, + { + "epoch": 0.85, + "learning_rate": 3.719636153478168e-06, + "loss": 0.8827, + "step": 4085 + }, + { + "epoch": 0.85, + "learning_rate": 3.718681440225492e-06, + "loss": 0.9106, + "step": 4086 + }, + { + "epoch": 0.85, + "learning_rate": 3.7177266497505474e-06, + "loss": 0.7753, + "step": 4087 + }, + { + "epoch": 0.85, + "learning_rate": 3.716771782155925e-06, + "loss": 0.8751, + "step": 4088 + }, + { + "epoch": 0.85, + "learning_rate": 3.715816837544226e-06, + "loss": 0.9886, + "step": 4089 + }, + { + "epoch": 0.85, + "learning_rate": 3.7148618160180585e-06, + "loss": 1.1248, + "step": 4090 + }, + { + "epoch": 0.85, + "learning_rate": 3.7139067176800402e-06, + "loss": 0.9552, + "step": 4091 + }, + { + "epoch": 0.85, + "learning_rate": 3.712951542632796e-06, + "loss": 0.8236, + "step": 4092 + }, + { + "epoch": 0.85, + "learning_rate": 3.7119962909789593e-06, + "loss": 0.9933, + "step": 4093 + }, + { + "epoch": 0.85, + "learning_rate": 3.711040962821172e-06, + "loss": 0.9507, + "step": 4094 + }, + { + "epoch": 0.85, + "learning_rate": 3.7100855582620845e-06, + "loss": 0.7273, + "step": 4095 + }, + { + "epoch": 0.85, + "learning_rate": 3.709130077404353e-06, + "loss": 0.9527, + "step": 4096 + }, + { + "epoch": 0.85, + "learning_rate": 3.708174520350647e-06, + "loss": 1.0169, + "step": 4097 + }, + { + "epoch": 0.85, + "learning_rate": 3.707218887203638e-06, + "loss": 0.7894, + "step": 4098 + }, + { + "epoch": 0.85, + "learning_rate": 3.706263178066011e-06, + "loss": 0.9018, + "step": 4099 + }, + { + "epoch": 0.85, + "learning_rate": 3.7053073930404547e-06, + "loss": 0.9593, + "step": 4100 + }, + { + "epoch": 0.85, + "learning_rate": 3.7043515322296704e-06, + "loss": 1.0466, + "step": 4101 + }, + { + "epoch": 0.85, + "learning_rate": 3.703395595736364e-06, + "loss": 0.973, + "step": 4102 + }, + { + "epoch": 0.85, + "learning_rate": 3.7024395836632514e-06, + "loss": 0.7311, + "step": 4103 + }, + { + "epoch": 0.85, + "learning_rate": 3.701483496113055e-06, + "loss": 0.7305, + "step": 4104 + }, + { + "epoch": 0.85, + "learning_rate": 3.7005273331885075e-06, + "loss": 1.1698, + "step": 4105 + }, + { + "epoch": 0.85, + "learning_rate": 3.6995710949923475e-06, + "loss": 0.9429, + "step": 4106 + }, + { + "epoch": 0.85, + "learning_rate": 3.698614781627324e-06, + "loss": 0.9108, + "step": 4107 + }, + { + "epoch": 0.85, + "learning_rate": 3.697658393196191e-06, + "loss": 1.0381, + "step": 4108 + }, + { + "epoch": 0.85, + "learning_rate": 3.6967019298017144e-06, + "loss": 0.8929, + "step": 4109 + }, + { + "epoch": 0.85, + "learning_rate": 3.695745391546665e-06, + "loss": 0.8746, + "step": 4110 + }, + { + "epoch": 0.86, + "learning_rate": 3.6947887785338225e-06, + "loss": 0.9581, + "step": 4111 + }, + { + "epoch": 0.86, + "learning_rate": 3.6938320908659754e-06, + "loss": 0.7451, + "step": 4112 + }, + { + "epoch": 0.86, + "learning_rate": 3.69287532864592e-06, + "loss": 0.9989, + "step": 4113 + }, + { + "epoch": 0.86, + "learning_rate": 3.69191849197646e-06, + "loss": 0.9757, + "step": 4114 + }, + { + "epoch": 0.86, + "learning_rate": 3.6909615809604077e-06, + "loss": 1.0695, + "step": 4115 + }, + { + "epoch": 0.86, + "learning_rate": 3.6900045957005815e-06, + "loss": 0.7498, + "step": 4116 + }, + { + "epoch": 0.86, + "learning_rate": 3.6890475362998118e-06, + "loss": 0.8496, + "step": 4117 + }, + { + "epoch": 0.86, + "learning_rate": 3.6880904028609336e-06, + "loss": 1.0019, + "step": 4118 + }, + { + "epoch": 0.86, + "learning_rate": 3.6871331954867903e-06, + "loss": 0.7593, + "step": 4119 + }, + { + "epoch": 0.86, + "learning_rate": 3.6861759142802337e-06, + "loss": 0.7554, + "step": 4120 + }, + { + "epoch": 0.86, + "learning_rate": 3.685218559344125e-06, + "loss": 1.0455, + "step": 4121 + }, + { + "epoch": 0.86, + "learning_rate": 3.68426113078133e-06, + "loss": 1.0572, + "step": 4122 + }, + { + "epoch": 0.86, + "learning_rate": 3.6833036286947267e-06, + "loss": 1.035, + "step": 4123 + }, + { + "epoch": 0.86, + "learning_rate": 3.682346053187196e-06, + "loss": 0.7394, + "step": 4124 + }, + { + "epoch": 0.86, + "learning_rate": 3.6813884043616313e-06, + "loss": 1.1489, + "step": 4125 + }, + { + "epoch": 0.86, + "learning_rate": 3.680430682320931e-06, + "loss": 1.1311, + "step": 4126 + }, + { + "epoch": 0.86, + "learning_rate": 3.6794728871680023e-06, + "loss": 1.0837, + "step": 4127 + }, + { + "epoch": 0.86, + "learning_rate": 3.6785150190057602e-06, + "loss": 0.82, + "step": 4128 + }, + { + "epoch": 0.86, + "learning_rate": 3.6775570779371287e-06, + "loss": 0.9707, + "step": 4129 + }, + { + "epoch": 0.86, + "learning_rate": 3.6765990640650365e-06, + "loss": 1.0821, + "step": 4130 + }, + { + "epoch": 0.86, + "learning_rate": 3.6756409774924238e-06, + "loss": 0.9289, + "step": 4131 + }, + { + "epoch": 0.86, + "learning_rate": 3.6746828183222353e-06, + "loss": 0.9753, + "step": 4132 + }, + { + "epoch": 0.86, + "learning_rate": 3.6737245866574273e-06, + "loss": 0.9821, + "step": 4133 + }, + { + "epoch": 0.86, + "learning_rate": 3.67276628260096e-06, + "loss": 1.0676, + "step": 4134 + }, + { + "epoch": 0.86, + "learning_rate": 3.6718079062558033e-06, + "loss": 1.0897, + "step": 4135 + }, + { + "epoch": 0.86, + "learning_rate": 3.670849457724935e-06, + "loss": 0.7526, + "step": 4136 + }, + { + "epoch": 0.86, + "learning_rate": 3.6698909371113415e-06, + "loss": 0.8059, + "step": 4137 + }, + { + "epoch": 0.86, + "learning_rate": 3.668932344518013e-06, + "loss": 0.9817, + "step": 4138 + }, + { + "epoch": 0.86, + "learning_rate": 3.667973680047953e-06, + "loss": 0.8009, + "step": 4139 + }, + { + "epoch": 0.86, + "learning_rate": 3.667014943804168e-06, + "loss": 1.2015, + "step": 4140 + }, + { + "epoch": 0.86, + "learning_rate": 3.666056135889676e-06, + "loss": 0.9084, + "step": 4141 + }, + { + "epoch": 0.86, + "learning_rate": 3.6650972564074983e-06, + "loss": 1.1919, + "step": 4142 + }, + { + "epoch": 0.86, + "learning_rate": 3.664138305460669e-06, + "loss": 1.0359, + "step": 4143 + }, + { + "epoch": 0.86, + "learning_rate": 3.6631792831522252e-06, + "loss": 0.9713, + "step": 4144 + }, + { + "epoch": 0.86, + "learning_rate": 3.6622201895852156e-06, + "loss": 0.9381, + "step": 4145 + }, + { + "epoch": 0.86, + "learning_rate": 3.661261024862693e-06, + "loss": 0.9056, + "step": 4146 + }, + { + "epoch": 0.86, + "learning_rate": 3.660301789087722e-06, + "loss": 0.8577, + "step": 4147 + }, + { + "epoch": 0.86, + "learning_rate": 3.659342482363369e-06, + "loss": 0.8956, + "step": 4148 + }, + { + "epoch": 0.86, + "learning_rate": 3.6583831047927145e-06, + "loss": 0.9183, + "step": 4149 + }, + { + "epoch": 0.86, + "learning_rate": 3.6574236564788423e-06, + "loss": 0.8903, + "step": 4150 + }, + { + "epoch": 0.86, + "learning_rate": 3.6564641375248448e-06, + "loss": 0.7841, + "step": 4151 + }, + { + "epoch": 0.86, + "learning_rate": 3.655504548033822e-06, + "loss": 0.9013, + "step": 4152 + }, + { + "epoch": 0.86, + "learning_rate": 3.654544888108884e-06, + "loss": 0.6859, + "step": 4153 + }, + { + "epoch": 0.86, + "learning_rate": 3.6535851578531425e-06, + "loss": 1.1309, + "step": 4154 + }, + { + "epoch": 0.86, + "learning_rate": 3.6526253573697243e-06, + "loss": 0.8477, + "step": 4155 + }, + { + "epoch": 0.86, + "learning_rate": 3.6516654867617563e-06, + "loss": 0.7977, + "step": 4156 + }, + { + "epoch": 0.86, + "learning_rate": 3.65070554613238e-06, + "loss": 0.8256, + "step": 4157 + }, + { + "epoch": 0.86, + "learning_rate": 3.6497455355847374e-06, + "loss": 1.0216, + "step": 4158 + }, + { + "epoch": 0.87, + "learning_rate": 3.648785455221984e-06, + "loss": 0.9339, + "step": 4159 + }, + { + "epoch": 0.87, + "learning_rate": 3.64782530514728e-06, + "loss": 0.8966, + "step": 4160 + }, + { + "epoch": 0.87, + "learning_rate": 3.6468650854637925e-06, + "loss": 0.8709, + "step": 4161 + }, + { + "epoch": 0.87, + "learning_rate": 3.645904796274699e-06, + "loss": 0.9412, + "step": 4162 + }, + { + "epoch": 0.87, + "learning_rate": 3.64494443768318e-06, + "loss": 0.9297, + "step": 4163 + }, + { + "epoch": 0.87, + "learning_rate": 3.643984009792428e-06, + "loss": 0.8369, + "step": 4164 + }, + { + "epoch": 0.87, + "learning_rate": 3.6430235127056397e-06, + "loss": 0.9043, + "step": 4165 + }, + { + "epoch": 0.87, + "learning_rate": 3.6420629465260217e-06, + "loss": 0.7783, + "step": 4166 + }, + { + "epoch": 0.87, + "learning_rate": 3.641102311356785e-06, + "loss": 0.8405, + "step": 4167 + }, + { + "epoch": 0.87, + "learning_rate": 3.6401416073011506e-06, + "loss": 1.0366, + "step": 4168 + }, + { + "epoch": 0.87, + "learning_rate": 3.639180834462346e-06, + "loss": 0.8467, + "step": 4169 + }, + { + "epoch": 0.87, + "learning_rate": 3.6382199929436077e-06, + "loss": 0.8341, + "step": 4170 + }, + { + "epoch": 0.87, + "learning_rate": 3.637259082848175e-06, + "loss": 0.9806, + "step": 4171 + }, + { + "epoch": 0.87, + "learning_rate": 3.6362981042793008e-06, + "loss": 0.9171, + "step": 4172 + }, + { + "epoch": 0.87, + "learning_rate": 3.63533705734024e-06, + "loss": 0.847, + "step": 4173 + }, + { + "epoch": 0.87, + "learning_rate": 3.6343759421342578e-06, + "loss": 0.987, + "step": 4174 + }, + { + "epoch": 0.87, + "learning_rate": 3.6334147587646253e-06, + "loss": 0.9263, + "step": 4175 + }, + { + "epoch": 0.87, + "learning_rate": 3.6324535073346233e-06, + "loss": 1.1115, + "step": 4176 + }, + { + "epoch": 0.87, + "learning_rate": 3.6314921879475364e-06, + "loss": 0.9814, + "step": 4177 + }, + { + "epoch": 0.87, + "learning_rate": 3.630530800706659e-06, + "loss": 1.1112, + "step": 4178 + }, + { + "epoch": 0.87, + "learning_rate": 3.6295693457152922e-06, + "loss": 0.8, + "step": 4179 + }, + { + "epoch": 0.87, + "learning_rate": 3.6286078230767445e-06, + "loss": 0.8919, + "step": 4180 + }, + { + "epoch": 0.87, + "learning_rate": 3.6276462328943298e-06, + "loss": 1.0514, + "step": 4181 + }, + { + "epoch": 0.87, + "learning_rate": 3.6266845752713733e-06, + "loss": 1.1498, + "step": 4182 + }, + { + "epoch": 0.87, + "learning_rate": 3.6257228503112037e-06, + "loss": 0.9557, + "step": 4183 + }, + { + "epoch": 0.87, + "learning_rate": 3.624761058117158e-06, + "loss": 1.0406, + "step": 4184 + }, + { + "epoch": 0.87, + "learning_rate": 3.6237991987925816e-06, + "loss": 1.0133, + "step": 4185 + }, + { + "epoch": 0.87, + "learning_rate": 3.6228372724408264e-06, + "loss": 0.8782, + "step": 4186 + }, + { + "epoch": 0.87, + "learning_rate": 3.6218752791652496e-06, + "loss": 0.9218, + "step": 4187 + }, + { + "epoch": 0.87, + "learning_rate": 3.6209132190692197e-06, + "loss": 0.8872, + "step": 4188 + }, + { + "epoch": 0.87, + "learning_rate": 3.619951092256107e-06, + "loss": 1.013, + "step": 4189 + }, + { + "epoch": 0.87, + "learning_rate": 3.618988898829295e-06, + "loss": 1.0264, + "step": 4190 + }, + { + "epoch": 0.87, + "learning_rate": 3.6180266388921694e-06, + "loss": 0.7283, + "step": 4191 + }, + { + "epoch": 0.87, + "learning_rate": 3.6170643125481262e-06, + "loss": 1.0086, + "step": 4192 + }, + { + "epoch": 0.87, + "learning_rate": 3.616101919900566e-06, + "loss": 0.7666, + "step": 4193 + }, + { + "epoch": 0.87, + "learning_rate": 3.615139461052899e-06, + "loss": 0.9891, + "step": 4194 + }, + { + "epoch": 0.87, + "learning_rate": 3.61417693610854e-06, + "loss": 0.9806, + "step": 4195 + }, + { + "epoch": 0.87, + "learning_rate": 3.6132143451709137e-06, + "loss": 0.836, + "step": 4196 + }, + { + "epoch": 0.87, + "learning_rate": 3.612251688343449e-06, + "loss": 1.1176, + "step": 4197 + }, + { + "epoch": 0.87, + "learning_rate": 3.611288965729585e-06, + "loss": 0.9631, + "step": 4198 + }, + { + "epoch": 0.87, + "learning_rate": 3.6103261774327648e-06, + "loss": 0.8893, + "step": 4199 + }, + { + "epoch": 0.87, + "learning_rate": 3.6093633235564415e-06, + "loss": 1.0076, + "step": 4200 + }, + { + "epoch": 0.87, + "learning_rate": 3.6084004042040704e-06, + "loss": 0.9367, + "step": 4201 + }, + { + "epoch": 0.87, + "learning_rate": 3.607437419479121e-06, + "loss": 1.0653, + "step": 4202 + }, + { + "epoch": 0.87, + "learning_rate": 3.6064743694850634e-06, + "loss": 1.0415, + "step": 4203 + }, + { + "epoch": 0.87, + "learning_rate": 3.605511254325379e-06, + "loss": 0.8028, + "step": 4204 + }, + { + "epoch": 0.87, + "learning_rate": 3.604548074103552e-06, + "loss": 1.0535, + "step": 4205 + }, + { + "epoch": 0.87, + "learning_rate": 3.603584828923079e-06, + "loss": 1.0591, + "step": 4206 + }, + { + "epoch": 0.88, + "learning_rate": 3.6026215188874584e-06, + "loss": 0.9426, + "step": 4207 + }, + { + "epoch": 0.88, + "learning_rate": 3.601658144100199e-06, + "loss": 1.0444, + "step": 4208 + }, + { + "epoch": 0.88, + "learning_rate": 3.600694704664815e-06, + "loss": 0.9884, + "step": 4209 + }, + { + "epoch": 0.88, + "learning_rate": 3.599731200684828e-06, + "loss": 1.0701, + "step": 4210 + }, + { + "epoch": 0.88, + "learning_rate": 3.5987676322637653e-06, + "loss": 0.9111, + "step": 4211 + }, + { + "epoch": 0.88, + "learning_rate": 3.5978039995051644e-06, + "loss": 1.1379, + "step": 4212 + }, + { + "epoch": 0.88, + "learning_rate": 3.5968403025125654e-06, + "loss": 0.9609, + "step": 4213 + }, + { + "epoch": 0.88, + "learning_rate": 3.5958765413895196e-06, + "loss": 0.8224, + "step": 4214 + }, + { + "epoch": 0.88, + "learning_rate": 3.5949127162395804e-06, + "loss": 0.7783, + "step": 4215 + }, + { + "epoch": 0.88, + "learning_rate": 3.5939488271663143e-06, + "loss": 0.8531, + "step": 4216 + }, + { + "epoch": 0.88, + "learning_rate": 3.5929848742732877e-06, + "loss": 0.9436, + "step": 4217 + }, + { + "epoch": 0.88, + "learning_rate": 3.5920208576640787e-06, + "loss": 0.9011, + "step": 4218 + }, + { + "epoch": 0.88, + "learning_rate": 3.5910567774422715e-06, + "loss": 0.9087, + "step": 4219 + }, + { + "epoch": 0.88, + "learning_rate": 3.590092633711455e-06, + "loss": 1.1456, + "step": 4220 + }, + { + "epoch": 0.88, + "learning_rate": 3.5891284265752273e-06, + "loss": 0.9713, + "step": 4221 + }, + { + "epoch": 0.88, + "learning_rate": 3.588164156137193e-06, + "loss": 1.1566, + "step": 4222 + }, + { + "epoch": 0.88, + "learning_rate": 3.5871998225009613e-06, + "loss": 0.9939, + "step": 4223 + }, + { + "epoch": 0.88, + "learning_rate": 3.586235425770151e-06, + "loss": 0.9316, + "step": 4224 + }, + { + "epoch": 0.88, + "learning_rate": 3.585270966048385e-06, + "loss": 0.9518, + "step": 4225 + }, + { + "epoch": 0.88, + "learning_rate": 3.5843064434392958e-06, + "loss": 0.9397, + "step": 4226 + }, + { + "epoch": 0.88, + "learning_rate": 3.583341858046522e-06, + "loss": 1.0128, + "step": 4227 + }, + { + "epoch": 0.88, + "learning_rate": 3.582377209973706e-06, + "loss": 0.862, + "step": 4228 + }, + { + "epoch": 0.88, + "learning_rate": 3.5814124993245016e-06, + "loss": 1.0805, + "step": 4229 + }, + { + "epoch": 0.88, + "learning_rate": 3.5804477262025643e-06, + "loss": 0.9244, + "step": 4230 + }, + { + "epoch": 0.88, + "learning_rate": 3.579482890711561e-06, + "loss": 0.7953, + "step": 4231 + }, + { + "epoch": 0.88, + "learning_rate": 3.578517992955162e-06, + "loss": 0.8448, + "step": 4232 + }, + { + "epoch": 0.88, + "learning_rate": 3.577553033037047e-06, + "loss": 0.9229, + "step": 4233 + }, + { + "epoch": 0.88, + "learning_rate": 3.5765880110608983e-06, + "loss": 0.9596, + "step": 4234 + }, + { + "epoch": 0.88, + "learning_rate": 3.57562292713041e-06, + "loss": 0.833, + "step": 4235 + }, + { + "epoch": 0.88, + "learning_rate": 3.5746577813492786e-06, + "loss": 0.7614, + "step": 4236 + }, + { + "epoch": 0.88, + "learning_rate": 3.57369257382121e-06, + "loss": 0.8277, + "step": 4237 + }, + { + "epoch": 0.88, + "learning_rate": 3.5727273046499153e-06, + "loss": 1.0229, + "step": 4238 + }, + { + "epoch": 0.88, + "learning_rate": 3.571761973939113e-06, + "loss": 1.1311, + "step": 4239 + }, + { + "epoch": 0.88, + "learning_rate": 3.5707965817925268e-06, + "loss": 0.9243, + "step": 4240 + }, + { + "epoch": 0.88, + "learning_rate": 3.569831128313889e-06, + "loss": 0.766, + "step": 4241 + }, + { + "epoch": 0.88, + "learning_rate": 3.5688656136069363e-06, + "loss": 0.9055, + "step": 4242 + }, + { + "epoch": 0.88, + "learning_rate": 3.567900037775415e-06, + "loss": 0.9399, + "step": 4243 + }, + { + "epoch": 0.88, + "learning_rate": 3.5669344009230744e-06, + "loss": 0.9336, + "step": 4244 + }, + { + "epoch": 0.88, + "learning_rate": 3.5659687031536737e-06, + "loss": 0.8251, + "step": 4245 + }, + { + "epoch": 0.88, + "learning_rate": 3.5650029445709754e-06, + "loss": 0.7857, + "step": 4246 + }, + { + "epoch": 0.88, + "learning_rate": 3.5640371252787518e-06, + "loss": 0.9072, + "step": 4247 + }, + { + "epoch": 0.88, + "learning_rate": 3.563071245380778e-06, + "loss": 0.9087, + "step": 4248 + }, + { + "epoch": 0.88, + "learning_rate": 3.5621053049808408e-06, + "loss": 1.0383, + "step": 4249 + }, + { + "epoch": 0.88, + "learning_rate": 3.5611393041827274e-06, + "loss": 0.8522, + "step": 4250 + }, + { + "epoch": 0.88, + "learning_rate": 3.5601732430902372e-06, + "loss": 0.9792, + "step": 4251 + }, + { + "epoch": 0.88, + "learning_rate": 3.5592071218071704e-06, + "loss": 1.1862, + "step": 4252 + }, + { + "epoch": 0.88, + "learning_rate": 3.5582409404373396e-06, + "loss": 0.7801, + "step": 4253 + }, + { + "epoch": 0.88, + "learning_rate": 3.5572746990845585e-06, + "loss": 1.1832, + "step": 4254 + }, + { + "epoch": 0.88, + "learning_rate": 3.5563083978526513e-06, + "loss": 0.7778, + "step": 4255 + }, + { + "epoch": 0.89, + "learning_rate": 3.555342036845446e-06, + "loss": 0.994, + "step": 4256 + }, + { + "epoch": 0.89, + "learning_rate": 3.554375616166779e-06, + "loss": 1.1453, + "step": 4257 + }, + { + "epoch": 0.89, + "learning_rate": 3.5534091359204914e-06, + "loss": 0.8163, + "step": 4258 + }, + { + "epoch": 0.89, + "learning_rate": 3.5524425962104322e-06, + "loss": 0.9811, + "step": 4259 + }, + { + "epoch": 0.89, + "learning_rate": 3.551475997140454e-06, + "loss": 0.8216, + "step": 4260 + }, + { + "epoch": 0.89, + "learning_rate": 3.5505093388144204e-06, + "loss": 0.9545, + "step": 4261 + }, + { + "epoch": 0.89, + "learning_rate": 3.549542621336197e-06, + "loss": 1.1106, + "step": 4262 + }, + { + "epoch": 0.89, + "learning_rate": 3.548575844809659e-06, + "loss": 0.9955, + "step": 4263 + }, + { + "epoch": 0.89, + "learning_rate": 3.5476090093386854e-06, + "loss": 0.9691, + "step": 4264 + }, + { + "epoch": 0.89, + "learning_rate": 3.5466421150271627e-06, + "loss": 1.0153, + "step": 4265 + }, + { + "epoch": 0.89, + "learning_rate": 3.5456751619789835e-06, + "loss": 0.8249, + "step": 4266 + }, + { + "epoch": 0.89, + "learning_rate": 3.544708150298047e-06, + "loss": 0.8161, + "step": 4267 + }, + { + "epoch": 0.89, + "learning_rate": 3.5437410800882587e-06, + "loss": 0.7219, + "step": 4268 + }, + { + "epoch": 0.89, + "learning_rate": 3.5427739514535305e-06, + "loss": 0.9553, + "step": 4269 + }, + { + "epoch": 0.89, + "learning_rate": 3.5418067644977798e-06, + "loss": 0.9718, + "step": 4270 + }, + { + "epoch": 0.89, + "learning_rate": 3.540839519324931e-06, + "loss": 0.9072, + "step": 4271 + }, + { + "epoch": 0.89, + "learning_rate": 3.539872216038914e-06, + "loss": 0.9234, + "step": 4272 + }, + { + "epoch": 0.89, + "learning_rate": 3.5389048547436663e-06, + "loss": 1.3678, + "step": 4273 + }, + { + "epoch": 0.89, + "learning_rate": 3.5379374355431302e-06, + "loss": 0.8501, + "step": 4274 + }, + { + "epoch": 0.89, + "learning_rate": 3.5369699585412556e-06, + "loss": 0.9107, + "step": 4275 + }, + { + "epoch": 0.89, + "learning_rate": 3.536002423841996e-06, + "loss": 0.9554, + "step": 4276 + }, + { + "epoch": 0.89, + "learning_rate": 3.535034831549316e-06, + "loss": 0.9027, + "step": 4277 + }, + { + "epoch": 0.89, + "learning_rate": 3.53406718176718e-06, + "loss": 0.7721, + "step": 4278 + }, + { + "epoch": 0.89, + "learning_rate": 3.5330994745995643e-06, + "loss": 1.1429, + "step": 4279 + }, + { + "epoch": 0.89, + "learning_rate": 3.5321317101504475e-06, + "loss": 0.8344, + "step": 4280 + }, + { + "epoch": 0.89, + "learning_rate": 3.531163888523817e-06, + "loss": 1.0361, + "step": 4281 + }, + { + "epoch": 0.89, + "learning_rate": 3.5301960098236645e-06, + "loss": 0.867, + "step": 4282 + }, + { + "epoch": 0.89, + "learning_rate": 3.529228074153988e-06, + "loss": 1.0368, + "step": 4283 + }, + { + "epoch": 0.89, + "learning_rate": 3.528260081618793e-06, + "loss": 0.7578, + "step": 4284 + }, + { + "epoch": 0.89, + "learning_rate": 3.5272920323220906e-06, + "loss": 1.0517, + "step": 4285 + }, + { + "epoch": 0.89, + "learning_rate": 3.526323926367896e-06, + "loss": 0.9647, + "step": 4286 + }, + { + "epoch": 0.89, + "learning_rate": 3.5253557638602334e-06, + "loss": 0.9328, + "step": 4287 + }, + { + "epoch": 0.89, + "learning_rate": 3.524387544903131e-06, + "loss": 0.901, + "step": 4288 + }, + { + "epoch": 0.89, + "learning_rate": 3.5234192696006255e-06, + "loss": 1.1862, + "step": 4289 + }, + { + "epoch": 0.89, + "learning_rate": 3.5224509380567557e-06, + "loss": 1.2642, + "step": 4290 + }, + { + "epoch": 0.89, + "learning_rate": 3.5214825503755707e-06, + "loss": 0.9363, + "step": 4291 + }, + { + "epoch": 0.89, + "learning_rate": 3.520514106661122e-06, + "loss": 0.9245, + "step": 4292 + }, + { + "epoch": 0.89, + "learning_rate": 3.51954560701747e-06, + "loss": 0.8686, + "step": 4293 + }, + { + "epoch": 0.89, + "learning_rate": 3.5185770515486795e-06, + "loss": 0.8547, + "step": 4294 + }, + { + "epoch": 0.89, + "learning_rate": 3.5176084403588223e-06, + "loss": 1.0648, + "step": 4295 + }, + { + "epoch": 0.89, + "learning_rate": 3.516639773551975e-06, + "loss": 0.7917, + "step": 4296 + }, + { + "epoch": 0.89, + "learning_rate": 3.51567105123222e-06, + "loss": 0.8984, + "step": 4297 + }, + { + "epoch": 0.89, + "learning_rate": 3.514702273503649e-06, + "loss": 0.8965, + "step": 4298 + }, + { + "epoch": 0.89, + "learning_rate": 3.513733440470354e-06, + "loss": 1.1048, + "step": 4299 + }, + { + "epoch": 0.89, + "learning_rate": 3.5127645522364385e-06, + "loss": 0.7478, + "step": 4300 + }, + { + "epoch": 0.89, + "learning_rate": 3.5117956089060078e-06, + "loss": 1.0793, + "step": 4301 + }, + { + "epoch": 0.89, + "learning_rate": 3.510826610583176e-06, + "loss": 0.8547, + "step": 4302 + }, + { + "epoch": 0.89, + "learning_rate": 3.5098575573720602e-06, + "loss": 0.8671, + "step": 4303 + }, + { + "epoch": 0.9, + "learning_rate": 3.508888449376788e-06, + "loss": 0.8056, + "step": 4304 + }, + { + "epoch": 0.9, + "learning_rate": 3.5079192867014866e-06, + "loss": 0.8318, + "step": 4305 + }, + { + "epoch": 0.9, + "learning_rate": 3.5069500694502955e-06, + "loss": 0.9719, + "step": 4306 + }, + { + "epoch": 0.9, + "learning_rate": 3.5059807977273545e-06, + "loss": 0.9243, + "step": 4307 + }, + { + "epoch": 0.9, + "learning_rate": 3.5050114716368135e-06, + "loss": 0.9752, + "step": 4308 + }, + { + "epoch": 0.9, + "learning_rate": 3.504042091282825e-06, + "loss": 0.9234, + "step": 4309 + }, + { + "epoch": 0.9, + "learning_rate": 3.503072656769551e-06, + "loss": 0.9024, + "step": 4310 + }, + { + "epoch": 0.9, + "learning_rate": 3.502103168201155e-06, + "loss": 0.8985, + "step": 4311 + }, + { + "epoch": 0.9, + "learning_rate": 3.50113362568181e-06, + "loss": 0.7889, + "step": 4312 + }, + { + "epoch": 0.9, + "learning_rate": 3.5001640293156924e-06, + "loss": 0.8111, + "step": 4313 + }, + { + "epoch": 0.9, + "learning_rate": 3.499194379206986e-06, + "loss": 0.7634, + "step": 4314 + }, + { + "epoch": 0.9, + "learning_rate": 3.498224675459878e-06, + "loss": 0.8502, + "step": 4315 + }, + { + "epoch": 0.9, + "learning_rate": 3.4972549181785652e-06, + "loss": 0.8013, + "step": 4316 + }, + { + "epoch": 0.9, + "learning_rate": 3.4962851074672464e-06, + "loss": 0.9301, + "step": 4317 + }, + { + "epoch": 0.9, + "learning_rate": 3.495315243430129e-06, + "loss": 0.9348, + "step": 4318 + }, + { + "epoch": 0.9, + "learning_rate": 3.494345326171423e-06, + "loss": 0.934, + "step": 4319 + }, + { + "epoch": 0.9, + "learning_rate": 3.493375355795348e-06, + "loss": 0.9167, + "step": 4320 + }, + { + "epoch": 0.9, + "learning_rate": 3.4924053324061253e-06, + "loss": 0.9365, + "step": 4321 + }, + { + "epoch": 0.9, + "learning_rate": 3.491435256107986e-06, + "loss": 0.8678, + "step": 4322 + }, + { + "epoch": 0.9, + "learning_rate": 3.4904651270051623e-06, + "loss": 1.2068, + "step": 4323 + }, + { + "epoch": 0.9, + "learning_rate": 3.4894949452018962e-06, + "loss": 0.9386, + "step": 4324 + }, + { + "epoch": 0.9, + "learning_rate": 3.4885247108024326e-06, + "loss": 0.7178, + "step": 4325 + }, + { + "epoch": 0.9, + "learning_rate": 3.487554423911025e-06, + "loss": 0.9339, + "step": 4326 + }, + { + "epoch": 0.9, + "learning_rate": 3.4865840846319277e-06, + "loss": 0.856, + "step": 4327 + }, + { + "epoch": 0.9, + "learning_rate": 3.4856136930694065e-06, + "loss": 0.9913, + "step": 4328 + }, + { + "epoch": 0.9, + "learning_rate": 3.4846432493277278e-06, + "loss": 0.8755, + "step": 4329 + }, + { + "epoch": 0.9, + "learning_rate": 3.483672753511167e-06, + "loss": 0.9936, + "step": 4330 + }, + { + "epoch": 0.9, + "learning_rate": 3.482702205724003e-06, + "loss": 0.971, + "step": 4331 + }, + { + "epoch": 0.9, + "learning_rate": 3.4817316060705216e-06, + "loss": 0.9336, + "step": 4332 + }, + { + "epoch": 0.9, + "learning_rate": 3.4807609546550138e-06, + "loss": 0.8179, + "step": 4333 + }, + { + "epoch": 0.9, + "learning_rate": 3.4797902515817757e-06, + "loss": 0.9706, + "step": 4334 + }, + { + "epoch": 0.9, + "learning_rate": 3.478819496955109e-06, + "loss": 0.8726, + "step": 4335 + }, + { + "epoch": 0.9, + "learning_rate": 3.477848690879322e-06, + "loss": 0.8802, + "step": 4336 + }, + { + "epoch": 0.9, + "learning_rate": 3.4768778334587264e-06, + "loss": 1.0507, + "step": 4337 + }, + { + "epoch": 0.9, + "learning_rate": 3.475906924797642e-06, + "loss": 0.8361, + "step": 4338 + }, + { + "epoch": 0.9, + "learning_rate": 3.474935965000393e-06, + "loss": 0.7838, + "step": 4339 + }, + { + "epoch": 0.9, + "learning_rate": 3.473964954171308e-06, + "loss": 1.1069, + "step": 4340 + }, + { + "epoch": 0.9, + "learning_rate": 3.4729938924147225e-06, + "loss": 0.9552, + "step": 4341 + }, + { + "epoch": 0.9, + "learning_rate": 3.4720227798349772e-06, + "loss": 0.815, + "step": 4342 + }, + { + "epoch": 0.9, + "learning_rate": 3.4710516165364174e-06, + "loss": 1.0446, + "step": 4343 + }, + { + "epoch": 0.9, + "learning_rate": 3.4700804026233962e-06, + "loss": 1.0412, + "step": 4344 + }, + { + "epoch": 0.9, + "learning_rate": 3.469109138200268e-06, + "loss": 0.9222, + "step": 4345 + }, + { + "epoch": 0.9, + "learning_rate": 3.468137823371397e-06, + "loss": 0.9518, + "step": 4346 + }, + { + "epoch": 0.9, + "learning_rate": 3.4671664582411495e-06, + "loss": 1.0171, + "step": 4347 + }, + { + "epoch": 0.9, + "learning_rate": 3.4661950429139002e-06, + "loss": 0.7906, + "step": 4348 + }, + { + "epoch": 0.9, + "learning_rate": 3.4652235774940266e-06, + "loss": 0.9499, + "step": 4349 + }, + { + "epoch": 0.9, + "learning_rate": 3.4642520620859133e-06, + "loss": 0.9546, + "step": 4350 + }, + { + "epoch": 0.9, + "learning_rate": 3.463280496793948e-06, + "loss": 0.8993, + "step": 4351 + }, + { + "epoch": 0.91, + "learning_rate": 3.4623088817225274e-06, + "loss": 1.0976, + "step": 4352 + }, + { + "epoch": 0.91, + "learning_rate": 3.4613372169760497e-06, + "loss": 1.0296, + "step": 4353 + }, + { + "epoch": 0.91, + "learning_rate": 3.4603655026589217e-06, + "loss": 1.0342, + "step": 4354 + }, + { + "epoch": 0.91, + "learning_rate": 3.4593937388755524e-06, + "loss": 1.1103, + "step": 4355 + }, + { + "epoch": 0.91, + "learning_rate": 3.4584219257303597e-06, + "loss": 0.8928, + "step": 4356 + }, + { + "epoch": 0.91, + "learning_rate": 3.457450063327763e-06, + "loss": 0.7671, + "step": 4357 + }, + { + "epoch": 0.91, + "learning_rate": 3.4564781517721903e-06, + "loss": 0.9408, + "step": 4358 + }, + { + "epoch": 0.91, + "learning_rate": 3.4555061911680716e-06, + "loss": 1.1083, + "step": 4359 + }, + { + "epoch": 0.91, + "learning_rate": 3.454534181619846e-06, + "loss": 0.8458, + "step": 4360 + }, + { + "epoch": 0.91, + "learning_rate": 3.4535621232319547e-06, + "loss": 0.8631, + "step": 4361 + }, + { + "epoch": 0.91, + "learning_rate": 3.4525900161088463e-06, + "loss": 1.0551, + "step": 4362 + }, + { + "epoch": 0.91, + "learning_rate": 3.451617860354972e-06, + "loss": 1.0771, + "step": 4363 + }, + { + "epoch": 0.91, + "learning_rate": 3.450645656074792e-06, + "loss": 1.0237, + "step": 4364 + }, + { + "epoch": 0.91, + "learning_rate": 3.4496734033727665e-06, + "loss": 0.8029, + "step": 4365 + }, + { + "epoch": 0.91, + "learning_rate": 3.448701102353368e-06, + "loss": 0.7932, + "step": 4366 + }, + { + "epoch": 0.91, + "learning_rate": 3.447728753121067e-06, + "loss": 0.9814, + "step": 4367 + }, + { + "epoch": 0.91, + "learning_rate": 3.446756355780344e-06, + "loss": 0.9383, + "step": 4368 + }, + { + "epoch": 0.91, + "learning_rate": 3.4457839104356815e-06, + "loss": 0.8339, + "step": 4369 + }, + { + "epoch": 0.91, + "learning_rate": 3.4448114171915707e-06, + "loss": 0.9622, + "step": 4370 + }, + { + "epoch": 0.91, + "learning_rate": 3.4438388761525036e-06, + "loss": 1.0681, + "step": 4371 + }, + { + "epoch": 0.91, + "learning_rate": 3.442866287422982e-06, + "loss": 1.1735, + "step": 4372 + }, + { + "epoch": 0.91, + "learning_rate": 3.4418936511075088e-06, + "loss": 0.7363, + "step": 4373 + }, + { + "epoch": 0.91, + "learning_rate": 3.4409209673105954e-06, + "loss": 0.849, + "step": 4374 + }, + { + "epoch": 0.91, + "learning_rate": 3.439948236136754e-06, + "loss": 0.9085, + "step": 4375 + }, + { + "epoch": 0.91, + "learning_rate": 3.438975457690507e-06, + "loss": 1.0114, + "step": 4376 + }, + { + "epoch": 0.91, + "learning_rate": 3.438002632076378e-06, + "loss": 1.0775, + "step": 4377 + }, + { + "epoch": 0.91, + "learning_rate": 3.437029759398898e-06, + "loss": 0.921, + "step": 4378 + }, + { + "epoch": 0.91, + "learning_rate": 3.4360568397626005e-06, + "loss": 0.845, + "step": 4379 + }, + { + "epoch": 0.91, + "learning_rate": 3.435083873272028e-06, + "loss": 0.8634, + "step": 4380 + }, + { + "epoch": 0.91, + "learning_rate": 3.4341108600317234e-06, + "loss": 0.8485, + "step": 4381 + }, + { + "epoch": 0.91, + "learning_rate": 3.4331378001462382e-06, + "loss": 1.0521, + "step": 4382 + }, + { + "epoch": 0.91, + "learning_rate": 3.4321646937201276e-06, + "loss": 1.1715, + "step": 4383 + }, + { + "epoch": 0.91, + "learning_rate": 3.431191540857951e-06, + "loss": 0.9237, + "step": 4384 + }, + { + "epoch": 0.91, + "learning_rate": 3.430218341664274e-06, + "loss": 0.9567, + "step": 4385 + }, + { + "epoch": 0.91, + "learning_rate": 3.4292450962436675e-06, + "loss": 0.9474, + "step": 4386 + }, + { + "epoch": 0.91, + "learning_rate": 3.4282718047007047e-06, + "loss": 1.0085, + "step": 4387 + }, + { + "epoch": 0.91, + "learning_rate": 3.4272984671399686e-06, + "loss": 0.97, + "step": 4388 + }, + { + "epoch": 0.91, + "learning_rate": 3.426325083666042e-06, + "loss": 0.9688, + "step": 4389 + }, + { + "epoch": 0.91, + "learning_rate": 3.4253516543835158e-06, + "loss": 0.9195, + "step": 4390 + }, + { + "epoch": 0.91, + "learning_rate": 3.4243781793969848e-06, + "loss": 0.8541, + "step": 4391 + }, + { + "epoch": 0.91, + "learning_rate": 3.4234046588110483e-06, + "loss": 0.8813, + "step": 4392 + }, + { + "epoch": 0.91, + "learning_rate": 3.422431092730312e-06, + "loss": 0.849, + "step": 4393 + }, + { + "epoch": 0.91, + "learning_rate": 3.421457481259385e-06, + "loss": 0.776, + "step": 4394 + }, + { + "epoch": 0.91, + "learning_rate": 3.420483824502881e-06, + "loss": 0.8508, + "step": 4395 + }, + { + "epoch": 0.91, + "learning_rate": 3.4195101225654196e-06, + "loss": 0.9736, + "step": 4396 + }, + { + "epoch": 0.91, + "learning_rate": 3.4185363755516267e-06, + "loss": 0.907, + "step": 4397 + }, + { + "epoch": 0.91, + "learning_rate": 3.4175625835661293e-06, + "loss": 0.8645, + "step": 4398 + }, + { + "epoch": 0.91, + "learning_rate": 3.4165887467135627e-06, + "loss": 0.9434, + "step": 4399 + }, + { + "epoch": 0.92, + "learning_rate": 3.4156148650985646e-06, + "loss": 1.1672, + "step": 4400 + }, + { + "epoch": 0.92, + "learning_rate": 3.414640938825779e-06, + "loss": 1.0569, + "step": 4401 + }, + { + "epoch": 0.92, + "learning_rate": 3.413666967999854e-06, + "loss": 0.7587, + "step": 4402 + }, + { + "epoch": 0.92, + "learning_rate": 3.4126929527254433e-06, + "loss": 0.911, + "step": 4403 + }, + { + "epoch": 0.92, + "learning_rate": 3.4117188931072036e-06, + "loss": 1.1367, + "step": 4404 + }, + { + "epoch": 0.92, + "learning_rate": 3.4107447892497984e-06, + "loss": 0.8833, + "step": 4405 + }, + { + "epoch": 0.92, + "learning_rate": 3.409770641257895e-06, + "loss": 0.7307, + "step": 4406 + }, + { + "epoch": 0.92, + "learning_rate": 3.4087964492361655e-06, + "loss": 0.979, + "step": 4407 + }, + { + "epoch": 0.92, + "learning_rate": 3.4078222132892865e-06, + "loss": 1.0441, + "step": 4408 + }, + { + "epoch": 0.92, + "learning_rate": 3.4068479335219403e-06, + "loss": 0.8838, + "step": 4409 + }, + { + "epoch": 0.92, + "learning_rate": 3.4058736100388115e-06, + "loss": 1.0993, + "step": 4410 + }, + { + "epoch": 0.92, + "learning_rate": 3.4048992429445936e-06, + "loss": 0.8351, + "step": 4411 + }, + { + "epoch": 0.92, + "learning_rate": 3.40392483234398e-06, + "loss": 0.9206, + "step": 4412 + }, + { + "epoch": 0.92, + "learning_rate": 3.402950378341673e-06, + "loss": 0.8863, + "step": 4413 + }, + { + "epoch": 0.92, + "learning_rate": 3.4019758810423752e-06, + "loss": 1.0478, + "step": 4414 + }, + { + "epoch": 0.92, + "learning_rate": 3.4010013405507988e-06, + "loss": 0.8681, + "step": 4415 + }, + { + "epoch": 0.92, + "learning_rate": 3.400026756971657e-06, + "loss": 0.8088, + "step": 4416 + }, + { + "epoch": 0.92, + "learning_rate": 3.3990521304096683e-06, + "loss": 0.975, + "step": 4417 + }, + { + "epoch": 0.92, + "learning_rate": 3.3980774609695564e-06, + "loss": 0.8362, + "step": 4418 + }, + { + "epoch": 0.92, + "learning_rate": 3.3971027487560505e-06, + "loss": 0.9554, + "step": 4419 + }, + { + "epoch": 0.92, + "learning_rate": 3.3961279938738815e-06, + "loss": 0.8413, + "step": 4420 + }, + { + "epoch": 0.92, + "learning_rate": 3.3951531964277887e-06, + "loss": 1.2945, + "step": 4421 + }, + { + "epoch": 0.92, + "learning_rate": 3.3941783565225125e-06, + "loss": 1.0559, + "step": 4422 + }, + { + "epoch": 0.92, + "learning_rate": 3.393203474262801e-06, + "loss": 0.8369, + "step": 4423 + }, + { + "epoch": 0.92, + "learning_rate": 3.3922285497534036e-06, + "loss": 1.0012, + "step": 4424 + }, + { + "epoch": 0.92, + "learning_rate": 3.391253583099076e-06, + "loss": 1.0519, + "step": 4425 + }, + { + "epoch": 0.92, + "learning_rate": 3.3902785744045797e-06, + "loss": 1.0642, + "step": 4426 + }, + { + "epoch": 0.92, + "learning_rate": 3.3893035237746778e-06, + "loss": 1.0671, + "step": 4427 + }, + { + "epoch": 0.92, + "learning_rate": 3.38832843131414e-06, + "loss": 0.9359, + "step": 4428 + }, + { + "epoch": 0.92, + "learning_rate": 3.3873532971277405e-06, + "loss": 0.9278, + "step": 4429 + }, + { + "epoch": 0.92, + "learning_rate": 3.386378121320257e-06, + "loss": 1.0131, + "step": 4430 + }, + { + "epoch": 0.92, + "learning_rate": 3.3854029039964714e-06, + "loss": 0.7947, + "step": 4431 + }, + { + "epoch": 0.92, + "learning_rate": 3.384427645261172e-06, + "loss": 0.9108, + "step": 4432 + }, + { + "epoch": 0.92, + "learning_rate": 3.3834523452191483e-06, + "loss": 1.0395, + "step": 4433 + }, + { + "epoch": 0.92, + "learning_rate": 3.3824770039751987e-06, + "loss": 0.9562, + "step": 4434 + }, + { + "epoch": 0.92, + "learning_rate": 3.381501621634122e-06, + "loss": 0.9206, + "step": 4435 + }, + { + "epoch": 0.92, + "learning_rate": 3.380526198300723e-06, + "loss": 0.7484, + "step": 4436 + }, + { + "epoch": 0.92, + "learning_rate": 3.379550734079811e-06, + "loss": 0.8488, + "step": 4437 + }, + { + "epoch": 0.92, + "learning_rate": 3.3785752290762003e-06, + "loss": 0.9386, + "step": 4438 + }, + { + "epoch": 0.92, + "learning_rate": 3.377599683394707e-06, + "loss": 0.8523, + "step": 4439 + }, + { + "epoch": 0.92, + "learning_rate": 3.3766240971401565e-06, + "loss": 1.1292, + "step": 4440 + }, + { + "epoch": 0.92, + "learning_rate": 3.375648470417372e-06, + "loss": 1.0143, + "step": 4441 + }, + { + "epoch": 0.92, + "learning_rate": 3.374672803331187e-06, + "loss": 0.86, + "step": 4442 + }, + { + "epoch": 0.92, + "learning_rate": 3.373697095986436e-06, + "loss": 0.8848, + "step": 4443 + }, + { + "epoch": 0.92, + "learning_rate": 3.372721348487959e-06, + "loss": 0.9335, + "step": 4444 + }, + { + "epoch": 0.92, + "learning_rate": 3.3717455609405987e-06, + "loss": 1.0374, + "step": 4445 + }, + { + "epoch": 0.92, + "learning_rate": 3.3707697334492055e-06, + "loss": 1.0602, + "step": 4446 + }, + { + "epoch": 0.92, + "learning_rate": 3.36979386611863e-06, + "loss": 0.9581, + "step": 4447 + }, + { + "epoch": 0.93, + "learning_rate": 3.368817959053731e-06, + "loss": 0.8601, + "step": 4448 + }, + { + "epoch": 0.93, + "learning_rate": 3.367842012359367e-06, + "loss": 1.0682, + "step": 4449 + }, + { + "epoch": 0.93, + "learning_rate": 3.366866026140407e-06, + "loss": 0.9576, + "step": 4450 + }, + { + "epoch": 0.93, + "learning_rate": 3.365890000501717e-06, + "loss": 0.8445, + "step": 4451 + }, + { + "epoch": 0.93, + "learning_rate": 3.3649139355481735e-06, + "loss": 0.8509, + "step": 4452 + }, + { + "epoch": 0.93, + "learning_rate": 3.363937831384653e-06, + "loss": 0.8749, + "step": 4453 + }, + { + "epoch": 0.93, + "learning_rate": 3.3629616881160395e-06, + "loss": 0.8143, + "step": 4454 + }, + { + "epoch": 0.93, + "learning_rate": 3.3619855058472176e-06, + "loss": 0.9471, + "step": 4455 + }, + { + "epoch": 0.93, + "learning_rate": 3.3610092846830794e-06, + "loss": 0.8725, + "step": 4456 + }, + { + "epoch": 0.93, + "learning_rate": 3.360033024728519e-06, + "loss": 1.1142, + "step": 4457 + }, + { + "epoch": 0.93, + "learning_rate": 3.359056726088436e-06, + "loss": 0.9696, + "step": 4458 + }, + { + "epoch": 0.93, + "learning_rate": 3.3580803888677333e-06, + "loss": 1.1277, + "step": 4459 + }, + { + "epoch": 0.93, + "learning_rate": 3.357104013171319e-06, + "loss": 0.9571, + "step": 4460 + }, + { + "epoch": 0.93, + "learning_rate": 3.3561275991041023e-06, + "loss": 1.0401, + "step": 4461 + }, + { + "epoch": 0.93, + "learning_rate": 3.355151146771002e-06, + "loss": 0.8539, + "step": 4462 + }, + { + "epoch": 0.93, + "learning_rate": 3.3541746562769357e-06, + "loss": 0.7241, + "step": 4463 + }, + { + "epoch": 0.93, + "learning_rate": 3.353198127726828e-06, + "loss": 0.9206, + "step": 4464 + }, + { + "epoch": 0.93, + "learning_rate": 3.352221561225607e-06, + "loss": 1.1001, + "step": 4465 + }, + { + "epoch": 0.93, + "learning_rate": 3.3512449568782044e-06, + "loss": 1.0632, + "step": 4466 + }, + { + "epoch": 0.93, + "learning_rate": 3.3502683147895554e-06, + "loss": 0.8763, + "step": 4467 + }, + { + "epoch": 0.93, + "learning_rate": 3.349291635064602e-06, + "loss": 1.1072, + "step": 4468 + }, + { + "epoch": 0.93, + "learning_rate": 3.3483149178082863e-06, + "loss": 0.8463, + "step": 4469 + }, + { + "epoch": 0.93, + "learning_rate": 3.347338163125558e-06, + "loss": 1.061, + "step": 4470 + }, + { + "epoch": 0.93, + "learning_rate": 3.346361371121369e-06, + "loss": 0.7292, + "step": 4471 + }, + { + "epoch": 0.93, + "learning_rate": 3.3453845419006755e-06, + "loss": 0.9062, + "step": 4472 + }, + { + "epoch": 0.93, + "learning_rate": 3.344407675568437e-06, + "loss": 0.9437, + "step": 4473 + }, + { + "epoch": 0.93, + "learning_rate": 3.3434307722296195e-06, + "loss": 1.0264, + "step": 4474 + }, + { + "epoch": 0.93, + "learning_rate": 3.3424538319891885e-06, + "loss": 0.828, + "step": 4475 + }, + { + "epoch": 0.93, + "learning_rate": 3.341476854952119e-06, + "loss": 0.7326, + "step": 4476 + }, + { + "epoch": 0.93, + "learning_rate": 3.3404998412233852e-06, + "loss": 0.8716, + "step": 4477 + }, + { + "epoch": 0.93, + "learning_rate": 3.3395227909079683e-06, + "loss": 0.9095, + "step": 4478 + }, + { + "epoch": 0.93, + "learning_rate": 3.338545704110851e-06, + "loss": 1.0631, + "step": 4479 + }, + { + "epoch": 0.93, + "learning_rate": 3.337568580937022e-06, + "loss": 0.8915, + "step": 4480 + }, + { + "epoch": 0.93, + "learning_rate": 3.3365914214914736e-06, + "loss": 1.0079, + "step": 4481 + }, + { + "epoch": 0.93, + "learning_rate": 3.3356142258792013e-06, + "loss": 0.9936, + "step": 4482 + }, + { + "epoch": 0.93, + "learning_rate": 3.334636994205203e-06, + "loss": 0.8889, + "step": 4483 + }, + { + "epoch": 0.93, + "learning_rate": 3.3336597265744846e-06, + "loss": 1.1444, + "step": 4484 + }, + { + "epoch": 0.93, + "learning_rate": 3.3326824230920515e-06, + "loss": 1.206, + "step": 4485 + }, + { + "epoch": 0.93, + "learning_rate": 3.3317050838629166e-06, + "loss": 1.022, + "step": 4486 + }, + { + "epoch": 0.93, + "learning_rate": 3.330727708992094e-06, + "loss": 0.9801, + "step": 4487 + }, + { + "epoch": 0.93, + "learning_rate": 3.3297502985846033e-06, + "loss": 0.8814, + "step": 4488 + }, + { + "epoch": 0.93, + "learning_rate": 3.328772852745465e-06, + "loss": 0.9248, + "step": 4489 + }, + { + "epoch": 0.93, + "learning_rate": 3.327795371579708e-06, + "loss": 1.0082, + "step": 4490 + }, + { + "epoch": 0.93, + "learning_rate": 3.326817855192362e-06, + "loss": 0.7582, + "step": 4491 + }, + { + "epoch": 0.93, + "learning_rate": 3.32584030368846e-06, + "loss": 1.1828, + "step": 4492 + }, + { + "epoch": 0.93, + "learning_rate": 3.324862717173041e-06, + "loss": 1.2155, + "step": 4493 + }, + { + "epoch": 0.93, + "learning_rate": 3.3238850957511466e-06, + "loss": 1.0136, + "step": 4494 + }, + { + "epoch": 0.93, + "learning_rate": 3.3229074395278218e-06, + "loss": 0.9137, + "step": 4495 + }, + { + "epoch": 0.94, + "learning_rate": 3.321929748608115e-06, + "loss": 0.9663, + "step": 4496 + }, + { + "epoch": 0.94, + "learning_rate": 3.3209520230970803e-06, + "loss": 0.9723, + "step": 4497 + }, + { + "epoch": 0.94, + "learning_rate": 3.3199742630997723e-06, + "loss": 0.8419, + "step": 4498 + }, + { + "epoch": 0.94, + "learning_rate": 3.3189964687212543e-06, + "loss": 0.9955, + "step": 4499 + }, + { + "epoch": 0.94, + "learning_rate": 3.3180186400665875e-06, + "loss": 0.9702, + "step": 4500 + }, + { + "epoch": 0.94, + "learning_rate": 3.3170407772408413e-06, + "loss": 1.127, + "step": 4501 + }, + { + "epoch": 0.94, + "learning_rate": 3.3160628803490855e-06, + "loss": 0.9288, + "step": 4502 + }, + { + "epoch": 0.94, + "learning_rate": 3.3150849494963958e-06, + "loss": 0.8358, + "step": 4503 + }, + { + "epoch": 0.94, + "learning_rate": 3.314106984787851e-06, + "loss": 0.9415, + "step": 4504 + }, + { + "epoch": 0.94, + "learning_rate": 3.3131289863285336e-06, + "loss": 0.9213, + "step": 4505 + }, + { + "epoch": 0.94, + "learning_rate": 3.312150954223528e-06, + "loss": 1.0247, + "step": 4506 + }, + { + "epoch": 0.94, + "learning_rate": 3.311172888577925e-06, + "loss": 0.9155, + "step": 4507 + }, + { + "epoch": 0.94, + "learning_rate": 3.3101947894968176e-06, + "loss": 1.0404, + "step": 4508 + }, + { + "epoch": 0.94, + "learning_rate": 3.3092166570853023e-06, + "loss": 1.0482, + "step": 4509 + }, + { + "epoch": 0.94, + "learning_rate": 3.308238491448479e-06, + "loss": 0.8855, + "step": 4510 + }, + { + "epoch": 0.94, + "learning_rate": 3.3072602926914517e-06, + "loss": 0.9378, + "step": 4511 + }, + { + "epoch": 0.94, + "learning_rate": 3.3062820609193274e-06, + "loss": 0.8873, + "step": 4512 + }, + { + "epoch": 0.94, + "learning_rate": 3.3053037962372185e-06, + "loss": 0.9625, + "step": 4513 + }, + { + "epoch": 0.94, + "learning_rate": 3.3043254987502372e-06, + "loss": 0.831, + "step": 4514 + }, + { + "epoch": 0.94, + "learning_rate": 3.3033471685635045e-06, + "loss": 1.1034, + "step": 4515 + }, + { + "epoch": 0.94, + "learning_rate": 3.3023688057821385e-06, + "loss": 0.9047, + "step": 4516 + }, + { + "epoch": 0.94, + "learning_rate": 3.301390410511267e-06, + "loss": 0.9079, + "step": 4517 + }, + { + "epoch": 0.94, + "learning_rate": 3.3004119828560173e-06, + "loss": 0.9455, + "step": 4518 + }, + { + "epoch": 0.94, + "learning_rate": 3.299433522921521e-06, + "loss": 0.9396, + "step": 4519 + }, + { + "epoch": 0.94, + "learning_rate": 3.2984550308129136e-06, + "loss": 0.8139, + "step": 4520 + }, + { + "epoch": 0.94, + "learning_rate": 3.2974765066353356e-06, + "loss": 1.1382, + "step": 4521 + }, + { + "epoch": 0.94, + "learning_rate": 3.296497950493928e-06, + "loss": 1.0385, + "step": 4522 + }, + { + "epoch": 0.94, + "learning_rate": 3.2955193624938365e-06, + "loss": 1.0319, + "step": 4523 + }, + { + "epoch": 0.94, + "learning_rate": 3.29454074274021e-06, + "loss": 1.2297, + "step": 4524 + }, + { + "epoch": 0.94, + "learning_rate": 3.293562091338203e-06, + "loss": 0.8819, + "step": 4525 + }, + { + "epoch": 0.94, + "learning_rate": 3.292583408392969e-06, + "loss": 1.092, + "step": 4526 + }, + { + "epoch": 0.94, + "learning_rate": 3.2916046940096696e-06, + "loss": 0.8909, + "step": 4527 + }, + { + "epoch": 0.94, + "learning_rate": 3.290625948293466e-06, + "loss": 0.9434, + "step": 4528 + }, + { + "epoch": 0.94, + "learning_rate": 3.289647171349525e-06, + "loss": 1.0408, + "step": 4529 + }, + { + "epoch": 0.94, + "learning_rate": 3.2886683632830162e-06, + "loss": 0.996, + "step": 4530 + }, + { + "epoch": 0.94, + "learning_rate": 3.287689524199113e-06, + "loss": 0.8603, + "step": 4531 + }, + { + "epoch": 0.94, + "learning_rate": 3.2867106542029895e-06, + "loss": 0.8225, + "step": 4532 + }, + { + "epoch": 0.94, + "learning_rate": 3.285731753399828e-06, + "loss": 0.9189, + "step": 4533 + }, + { + "epoch": 0.94, + "learning_rate": 3.2847528218948096e-06, + "loss": 0.9443, + "step": 4534 + }, + { + "epoch": 0.94, + "learning_rate": 3.283773859793121e-06, + "loss": 0.9935, + "step": 4535 + }, + { + "epoch": 0.94, + "learning_rate": 3.2827948671999504e-06, + "loss": 0.9593, + "step": 4536 + }, + { + "epoch": 0.94, + "learning_rate": 3.281815844220493e-06, + "loss": 0.9621, + "step": 4537 + }, + { + "epoch": 0.94, + "learning_rate": 3.2808367909599423e-06, + "loss": 0.8421, + "step": 4538 + }, + { + "epoch": 0.94, + "learning_rate": 3.279857707523499e-06, + "loss": 0.9005, + "step": 4539 + }, + { + "epoch": 0.94, + "learning_rate": 3.2788785940163644e-06, + "loss": 1.0368, + "step": 4540 + }, + { + "epoch": 0.94, + "learning_rate": 3.277899450543745e-06, + "loss": 0.8788, + "step": 4541 + }, + { + "epoch": 0.94, + "learning_rate": 3.2769202772108495e-06, + "loss": 0.8493, + "step": 4542 + }, + { + "epoch": 0.94, + "learning_rate": 3.2759410741228905e-06, + "loss": 0.7223, + "step": 4543 + }, + { + "epoch": 0.95, + "learning_rate": 3.274961841385082e-06, + "loss": 0.8452, + "step": 4544 + }, + { + "epoch": 0.95, + "learning_rate": 3.273982579102644e-06, + "loss": 0.9816, + "step": 4545 + }, + { + "epoch": 0.95, + "learning_rate": 3.2730032873807964e-06, + "loss": 1.0151, + "step": 4546 + }, + { + "epoch": 0.95, + "learning_rate": 3.2720239663247656e-06, + "loss": 0.8091, + "step": 4547 + }, + { + "epoch": 0.95, + "learning_rate": 3.271044616039779e-06, + "loss": 0.9972, + "step": 4548 + }, + { + "epoch": 0.95, + "learning_rate": 3.2700652366310683e-06, + "loss": 1.3231, + "step": 4549 + }, + { + "epoch": 0.95, + "learning_rate": 3.269085828203866e-06, + "loss": 0.9313, + "step": 4550 + }, + { + "epoch": 0.95, + "learning_rate": 3.268106390863412e-06, + "loss": 0.9414, + "step": 4551 + }, + { + "epoch": 0.95, + "learning_rate": 3.2671269247149444e-06, + "loss": 0.8726, + "step": 4552 + }, + { + "epoch": 0.95, + "learning_rate": 3.266147429863708e-06, + "loss": 0.9656, + "step": 4553 + }, + { + "epoch": 0.95, + "learning_rate": 3.2651679064149495e-06, + "loss": 0.9921, + "step": 4554 + }, + { + "epoch": 0.95, + "learning_rate": 3.2641883544739182e-06, + "loss": 0.8813, + "step": 4555 + }, + { + "epoch": 0.95, + "learning_rate": 3.263208774145867e-06, + "loss": 0.9237, + "step": 4556 + }, + { + "epoch": 0.95, + "learning_rate": 3.262229165536053e-06, + "loss": 0.9373, + "step": 4557 + }, + { + "epoch": 0.95, + "learning_rate": 3.261249528749732e-06, + "loss": 0.9263, + "step": 4558 + }, + { + "epoch": 0.95, + "learning_rate": 3.2602698638921684e-06, + "loss": 1.2797, + "step": 4559 + }, + { + "epoch": 0.95, + "learning_rate": 3.2592901710686265e-06, + "loss": 0.9703, + "step": 4560 + }, + { + "epoch": 0.95, + "learning_rate": 3.258310450384375e-06, + "loss": 0.9257, + "step": 4561 + }, + { + "epoch": 0.95, + "learning_rate": 3.2573307019446836e-06, + "loss": 1.0086, + "step": 4562 + }, + { + "epoch": 0.95, + "learning_rate": 3.2563509258548257e-06, + "loss": 0.7805, + "step": 4563 + }, + { + "epoch": 0.95, + "learning_rate": 3.2553711222200806e-06, + "loss": 1.0459, + "step": 4564 + }, + { + "epoch": 0.95, + "learning_rate": 3.2543912911457257e-06, + "loss": 0.8289, + "step": 4565 + }, + { + "epoch": 0.95, + "learning_rate": 3.2534114327370453e-06, + "loss": 1.0702, + "step": 4566 + }, + { + "epoch": 0.95, + "learning_rate": 3.2524315470993237e-06, + "loss": 0.849, + "step": 4567 + }, + { + "epoch": 0.95, + "learning_rate": 3.251451634337851e-06, + "loss": 1.0459, + "step": 4568 + }, + { + "epoch": 0.95, + "learning_rate": 3.2504716945579182e-06, + "loss": 0.9575, + "step": 4569 + }, + { + "epoch": 0.95, + "learning_rate": 3.24949172786482e-06, + "loss": 1.195, + "step": 4570 + }, + { + "epoch": 0.95, + "learning_rate": 3.2485117343638526e-06, + "loss": 0.8492, + "step": 4571 + }, + { + "epoch": 0.95, + "learning_rate": 3.2475317141603174e-06, + "loss": 0.7998, + "step": 4572 + }, + { + "epoch": 0.95, + "learning_rate": 3.246551667359517e-06, + "loss": 0.9392, + "step": 4573 + }, + { + "epoch": 0.95, + "learning_rate": 3.245571594066758e-06, + "loss": 0.9397, + "step": 4574 + }, + { + "epoch": 0.95, + "learning_rate": 3.2445914943873483e-06, + "loss": 1.1165, + "step": 4575 + }, + { + "epoch": 0.95, + "learning_rate": 3.243611368426601e-06, + "loss": 0.9677, + "step": 4576 + }, + { + "epoch": 0.95, + "learning_rate": 3.242631216289828e-06, + "loss": 1.1085, + "step": 4577 + }, + { + "epoch": 0.95, + "learning_rate": 3.2416510380823493e-06, + "loss": 0.8094, + "step": 4578 + }, + { + "epoch": 0.95, + "learning_rate": 3.2406708339094827e-06, + "loss": 1.2573, + "step": 4579 + }, + { + "epoch": 0.95, + "learning_rate": 3.239690603876553e-06, + "loss": 1.3212, + "step": 4580 + }, + { + "epoch": 0.95, + "learning_rate": 3.238710348088885e-06, + "loss": 0.742, + "step": 4581 + }, + { + "epoch": 0.95, + "learning_rate": 3.2377300666518066e-06, + "loss": 1.0205, + "step": 4582 + }, + { + "epoch": 0.95, + "learning_rate": 3.23674975967065e-06, + "loss": 0.9621, + "step": 4583 + }, + { + "epoch": 0.95, + "learning_rate": 3.235769427250749e-06, + "loss": 0.7429, + "step": 4584 + }, + { + "epoch": 0.95, + "learning_rate": 3.234789069497439e-06, + "loss": 1.2789, + "step": 4585 + }, + { + "epoch": 0.95, + "learning_rate": 3.2338086865160606e-06, + "loss": 0.9792, + "step": 4586 + }, + { + "epoch": 0.95, + "learning_rate": 3.232828278411955e-06, + "loss": 1.1268, + "step": 4587 + }, + { + "epoch": 0.95, + "learning_rate": 3.2318478452904687e-06, + "loss": 1.0391, + "step": 4588 + }, + { + "epoch": 0.95, + "learning_rate": 3.2308673872569473e-06, + "loss": 0.8298, + "step": 4589 + }, + { + "epoch": 0.95, + "learning_rate": 3.2298869044167416e-06, + "loss": 1.0151, + "step": 4590 + }, + { + "epoch": 0.95, + "learning_rate": 3.2289063968752044e-06, + "loss": 0.8503, + "step": 4591 + }, + { + "epoch": 0.96, + "learning_rate": 3.2279258647376914e-06, + "loss": 1.0696, + "step": 4592 + }, + { + "epoch": 0.96, + "learning_rate": 3.22694530810956e-06, + "loss": 0.9313, + "step": 4593 + }, + { + "epoch": 0.96, + "learning_rate": 3.225964727096172e-06, + "loss": 0.9565, + "step": 4594 + }, + { + "epoch": 0.96, + "learning_rate": 3.2249841218028893e-06, + "loss": 0.8305, + "step": 4595 + }, + { + "epoch": 0.96, + "learning_rate": 3.22400349233508e-06, + "loss": 0.7221, + "step": 4596 + }, + { + "epoch": 0.96, + "learning_rate": 3.223022838798111e-06, + "loss": 0.8977, + "step": 4597 + }, + { + "epoch": 0.96, + "learning_rate": 3.2220421612973536e-06, + "loss": 0.9416, + "step": 4598 + }, + { + "epoch": 0.96, + "learning_rate": 3.2210614599381818e-06, + "loss": 0.862, + "step": 4599 + }, + { + "epoch": 0.96, + "learning_rate": 3.2200807348259724e-06, + "loss": 0.8866, + "step": 4600 + }, + { + "epoch": 0.96, + "learning_rate": 3.2190999860661033e-06, + "loss": 0.9634, + "step": 4601 + }, + { + "epoch": 0.96, + "learning_rate": 3.2181192137639567e-06, + "loss": 0.8615, + "step": 4602 + }, + { + "epoch": 0.96, + "learning_rate": 3.2171384180249154e-06, + "loss": 1.0237, + "step": 4603 + }, + { + "epoch": 0.96, + "learning_rate": 3.216157598954367e-06, + "loss": 0.942, + "step": 4604 + }, + { + "epoch": 0.96, + "learning_rate": 3.2151767566577e-06, + "loss": 0.7564, + "step": 4605 + }, + { + "epoch": 0.96, + "learning_rate": 3.214195891240306e-06, + "loss": 1.0299, + "step": 4606 + }, + { + "epoch": 0.96, + "learning_rate": 3.213215002807579e-06, + "loss": 0.9011, + "step": 4607 + }, + { + "epoch": 0.96, + "learning_rate": 3.2122340914649152e-06, + "loss": 0.9008, + "step": 4608 + }, + { + "epoch": 0.96, + "learning_rate": 3.2112531573177126e-06, + "loss": 0.8336, + "step": 4609 + }, + { + "epoch": 0.96, + "learning_rate": 3.2102722004713738e-06, + "loss": 0.965, + "step": 4610 + }, + { + "epoch": 0.96, + "learning_rate": 3.209291221031302e-06, + "loss": 0.8471, + "step": 4611 + }, + { + "epoch": 0.96, + "learning_rate": 3.2083102191029037e-06, + "loss": 0.8491, + "step": 4612 + }, + { + "epoch": 0.96, + "learning_rate": 3.207329194791587e-06, + "loss": 0.8146, + "step": 4613 + }, + { + "epoch": 0.96, + "learning_rate": 3.2063481482027626e-06, + "loss": 0.878, + "step": 4614 + }, + { + "epoch": 0.96, + "learning_rate": 3.2053670794418454e-06, + "loss": 1.0154, + "step": 4615 + }, + { + "epoch": 0.96, + "learning_rate": 3.20438598861425e-06, + "loss": 0.9615, + "step": 4616 + }, + { + "epoch": 0.96, + "learning_rate": 3.203404875825394e-06, + "loss": 1.0437, + "step": 4617 + }, + { + "epoch": 0.96, + "learning_rate": 3.2024237411806993e-06, + "loss": 1.2525, + "step": 4618 + }, + { + "epoch": 0.96, + "learning_rate": 3.201442584785587e-06, + "loss": 0.8439, + "step": 4619 + }, + { + "epoch": 0.96, + "learning_rate": 3.2004614067454844e-06, + "loss": 0.9196, + "step": 4620 + }, + { + "epoch": 0.96, + "learning_rate": 3.1994802071658176e-06, + "loss": 0.8912, + "step": 4621 + }, + { + "epoch": 0.96, + "learning_rate": 3.1984989861520173e-06, + "loss": 0.8749, + "step": 4622 + }, + { + "epoch": 0.96, + "learning_rate": 3.197517743809514e-06, + "loss": 0.9248, + "step": 4623 + }, + { + "epoch": 0.96, + "learning_rate": 3.1965364802437443e-06, + "loss": 0.932, + "step": 4624 + }, + { + "epoch": 0.96, + "learning_rate": 3.195555195560143e-06, + "loss": 0.7811, + "step": 4625 + }, + { + "epoch": 0.96, + "learning_rate": 3.1945738898641497e-06, + "loss": 1.1132, + "step": 4626 + }, + { + "epoch": 0.96, + "learning_rate": 3.193592563261207e-06, + "loss": 0.914, + "step": 4627 + }, + { + "epoch": 0.96, + "learning_rate": 3.1926112158567563e-06, + "loss": 1.0554, + "step": 4628 + }, + { + "epoch": 0.96, + "learning_rate": 3.191629847756245e-06, + "loss": 0.7158, + "step": 4629 + }, + { + "epoch": 0.96, + "learning_rate": 3.190648459065119e-06, + "loss": 1.035, + "step": 4630 + }, + { + "epoch": 0.96, + "learning_rate": 3.189667049888831e-06, + "loss": 0.8726, + "step": 4631 + }, + { + "epoch": 0.96, + "learning_rate": 3.188685620332831e-06, + "loss": 1.157, + "step": 4632 + }, + { + "epoch": 0.96, + "learning_rate": 3.1877041705025752e-06, + "loss": 0.9441, + "step": 4633 + }, + { + "epoch": 0.96, + "learning_rate": 3.1867227005035193e-06, + "loss": 0.9222, + "step": 4634 + }, + { + "epoch": 0.96, + "learning_rate": 3.1857412104411234e-06, + "loss": 1.0051, + "step": 4635 + }, + { + "epoch": 0.96, + "learning_rate": 3.184759700420846e-06, + "loss": 1.0019, + "step": 4636 + }, + { + "epoch": 0.96, + "learning_rate": 3.1837781705481537e-06, + "loss": 0.9522, + "step": 4637 + }, + { + "epoch": 0.96, + "learning_rate": 3.1827966209285085e-06, + "loss": 0.9113, + "step": 4638 + }, + { + "epoch": 0.96, + "learning_rate": 3.1818150516673813e-06, + "loss": 0.853, + "step": 4639 + }, + { + "epoch": 0.97, + "learning_rate": 3.1808334628702384e-06, + "loss": 1.0399, + "step": 4640 + }, + { + "epoch": 0.97, + "learning_rate": 3.1798518546425536e-06, + "loss": 1.086, + "step": 4641 + }, + { + "epoch": 0.97, + "learning_rate": 3.1788702270897985e-06, + "loss": 1.1529, + "step": 4642 + }, + { + "epoch": 0.97, + "learning_rate": 3.1778885803174516e-06, + "loss": 0.8409, + "step": 4643 + }, + { + "epoch": 0.97, + "learning_rate": 3.176906914430989e-06, + "loss": 0.8463, + "step": 4644 + }, + { + "epoch": 0.97, + "learning_rate": 3.175925229535891e-06, + "loss": 0.7614, + "step": 4645 + }, + { + "epoch": 0.97, + "learning_rate": 3.1749435257376396e-06, + "loss": 0.8355, + "step": 4646 + }, + { + "epoch": 0.97, + "learning_rate": 3.1739618031417193e-06, + "loss": 0.8856, + "step": 4647 + }, + { + "epoch": 0.97, + "learning_rate": 3.172980061853615e-06, + "loss": 0.7738, + "step": 4648 + }, + { + "epoch": 0.97, + "learning_rate": 3.171998301978816e-06, + "loss": 1.0726, + "step": 4649 + }, + { + "epoch": 0.97, + "learning_rate": 3.171016523622811e-06, + "loss": 0.8953, + "step": 4650 + }, + { + "epoch": 0.97, + "learning_rate": 3.170034726891094e-06, + "loss": 0.8518, + "step": 4651 + }, + { + "epoch": 0.97, + "learning_rate": 3.169052911889156e-06, + "loss": 0.8041, + "step": 4652 + }, + { + "epoch": 0.97, + "learning_rate": 3.1680710787224968e-06, + "loss": 0.7901, + "step": 4653 + }, + { + "epoch": 0.97, + "learning_rate": 3.1670892274966105e-06, + "loss": 1.113, + "step": 4654 + }, + { + "epoch": 0.97, + "learning_rate": 3.1661073583169994e-06, + "loss": 0.9278, + "step": 4655 + }, + { + "epoch": 0.97, + "learning_rate": 3.1651254712891637e-06, + "loss": 1.2456, + "step": 4656 + }, + { + "epoch": 0.97, + "learning_rate": 3.164143566518609e-06, + "loss": 0.8414, + "step": 4657 + }, + { + "epoch": 0.97, + "learning_rate": 3.1631616441108388e-06, + "loss": 0.8997, + "step": 4658 + }, + { + "epoch": 0.97, + "learning_rate": 3.1621797041713624e-06, + "loss": 0.8359, + "step": 4659 + }, + { + "epoch": 0.97, + "learning_rate": 3.1611977468056884e-06, + "loss": 1.0168, + "step": 4660 + }, + { + "epoch": 0.97, + "learning_rate": 3.160215772119327e-06, + "loss": 0.8203, + "step": 4661 + }, + { + "epoch": 0.97, + "learning_rate": 3.1592337802177937e-06, + "loss": 1.0731, + "step": 4662 + }, + { + "epoch": 0.97, + "learning_rate": 3.1582517712066e-06, + "loss": 0.9924, + "step": 4663 + }, + { + "epoch": 0.97, + "learning_rate": 3.157269745191267e-06, + "loss": 0.9955, + "step": 4664 + }, + { + "epoch": 0.97, + "learning_rate": 3.15628770227731e-06, + "loss": 0.7654, + "step": 4665 + }, + { + "epoch": 0.97, + "learning_rate": 3.1553056425702506e-06, + "loss": 1.0389, + "step": 4666 + }, + { + "epoch": 0.97, + "learning_rate": 3.154323566175611e-06, + "loss": 0.8118, + "step": 4667 + }, + { + "epoch": 0.97, + "learning_rate": 3.1533414731989155e-06, + "loss": 0.8972, + "step": 4668 + }, + { + "epoch": 0.97, + "learning_rate": 3.152359363745689e-06, + "loss": 0.8256, + "step": 4669 + }, + { + "epoch": 0.97, + "learning_rate": 3.151377237921461e-06, + "loss": 1.015, + "step": 4670 + }, + { + "epoch": 0.97, + "learning_rate": 3.150395095831758e-06, + "loss": 0.7969, + "step": 4671 + }, + { + "epoch": 0.97, + "learning_rate": 3.1494129375821138e-06, + "loss": 1.1691, + "step": 4672 + }, + { + "epoch": 0.97, + "learning_rate": 3.1484307632780594e-06, + "loss": 0.7663, + "step": 4673 + }, + { + "epoch": 0.97, + "learning_rate": 3.147448573025131e-06, + "loss": 0.8382, + "step": 4674 + }, + { + "epoch": 0.97, + "learning_rate": 3.146466366928863e-06, + "loss": 0.8289, + "step": 4675 + }, + { + "epoch": 0.97, + "learning_rate": 3.1454841450947955e-06, + "loss": 0.7489, + "step": 4676 + }, + { + "epoch": 0.97, + "learning_rate": 3.1445019076284654e-06, + "loss": 0.8735, + "step": 4677 + }, + { + "epoch": 0.97, + "learning_rate": 3.143519654635417e-06, + "loss": 0.8679, + "step": 4678 + }, + { + "epoch": 0.97, + "learning_rate": 3.142537386221191e-06, + "loss": 0.9369, + "step": 4679 + }, + { + "epoch": 0.97, + "learning_rate": 3.1415551024913334e-06, + "loss": 1.1702, + "step": 4680 + }, + { + "epoch": 0.97, + "learning_rate": 3.1405728035513903e-06, + "loss": 0.9207, + "step": 4681 + }, + { + "epoch": 0.97, + "learning_rate": 3.1395904895069095e-06, + "loss": 0.8999, + "step": 4682 + }, + { + "epoch": 0.97, + "learning_rate": 3.1386081604634404e-06, + "loss": 0.7872, + "step": 4683 + }, + { + "epoch": 0.97, + "learning_rate": 3.1376258165265346e-06, + "loss": 0.9259, + "step": 4684 + }, + { + "epoch": 0.97, + "learning_rate": 3.1366434578017444e-06, + "loss": 0.8587, + "step": 4685 + }, + { + "epoch": 0.97, + "learning_rate": 3.1356610843946244e-06, + "loss": 0.7489, + "step": 4686 + }, + { + "epoch": 0.97, + "learning_rate": 3.1346786964107297e-06, + "loss": 0.8976, + "step": 4687 + }, + { + "epoch": 0.98, + "learning_rate": 3.1336962939556206e-06, + "loss": 0.8747, + "step": 4688 + }, + { + "epoch": 0.98, + "learning_rate": 3.1327138771348527e-06, + "loss": 0.9891, + "step": 4689 + }, + { + "epoch": 0.98, + "learning_rate": 3.131731446053989e-06, + "loss": 0.9875, + "step": 4690 + }, + { + "epoch": 0.98, + "learning_rate": 3.1307490008185902e-06, + "loss": 1.2291, + "step": 4691 + }, + { + "epoch": 0.98, + "learning_rate": 3.1297665415342213e-06, + "loss": 1.2263, + "step": 4692 + }, + { + "epoch": 0.98, + "learning_rate": 3.128784068306446e-06, + "loss": 0.8335, + "step": 4693 + }, + { + "epoch": 0.98, + "learning_rate": 3.1278015812408323e-06, + "loss": 1.0208, + "step": 4694 + }, + { + "epoch": 0.98, + "learning_rate": 3.126819080442948e-06, + "loss": 1.2896, + "step": 4695 + }, + { + "epoch": 0.98, + "learning_rate": 3.125836566018362e-06, + "loss": 0.9659, + "step": 4696 + }, + { + "epoch": 0.98, + "learning_rate": 3.1248540380726456e-06, + "loss": 0.8773, + "step": 4697 + }, + { + "epoch": 0.98, + "learning_rate": 3.1238714967113732e-06, + "loss": 0.8349, + "step": 4698 + }, + { + "epoch": 0.98, + "learning_rate": 3.1228889420401156e-06, + "loss": 0.8122, + "step": 4699 + }, + { + "epoch": 0.98, + "learning_rate": 3.1219063741644508e-06, + "loss": 0.959, + "step": 4700 + }, + { + "epoch": 0.98, + "learning_rate": 3.120923793189955e-06, + "loss": 0.8049, + "step": 4701 + }, + { + "epoch": 0.98, + "learning_rate": 3.119941199222206e-06, + "loss": 1.2085, + "step": 4702 + }, + { + "epoch": 0.98, + "learning_rate": 3.1189585923667833e-06, + "loss": 0.8594, + "step": 4703 + }, + { + "epoch": 0.98, + "learning_rate": 3.1179759727292692e-06, + "loss": 0.9976, + "step": 4704 + }, + { + "epoch": 0.98, + "learning_rate": 3.1169933404152443e-06, + "loss": 0.7043, + "step": 4705 + }, + { + "epoch": 0.98, + "learning_rate": 3.116010695530294e-06, + "loss": 0.7456, + "step": 4706 + }, + { + "epoch": 0.98, + "learning_rate": 3.1150280381800026e-06, + "loss": 0.7913, + "step": 4707 + }, + { + "epoch": 0.98, + "learning_rate": 3.114045368469957e-06, + "loss": 0.8431, + "step": 4708 + }, + { + "epoch": 0.98, + "learning_rate": 3.1130626865057437e-06, + "loss": 0.8438, + "step": 4709 + }, + { + "epoch": 0.98, + "learning_rate": 3.112079992392954e-06, + "loss": 1.0413, + "step": 4710 + }, + { + "epoch": 0.98, + "learning_rate": 3.111097286237176e-06, + "loss": 0.9316, + "step": 4711 + }, + { + "epoch": 0.98, + "learning_rate": 3.1101145681440036e-06, + "loss": 0.9792, + "step": 4712 + }, + { + "epoch": 0.98, + "learning_rate": 3.109131838219028e-06, + "loss": 0.9227, + "step": 4713 + }, + { + "epoch": 0.98, + "learning_rate": 3.1081490965678447e-06, + "loss": 1.0233, + "step": 4714 + }, + { + "epoch": 0.98, + "learning_rate": 3.107166343296048e-06, + "loss": 1.0246, + "step": 4715 + }, + { + "epoch": 0.98, + "learning_rate": 3.1061835785092357e-06, + "loss": 0.7918, + "step": 4716 + }, + { + "epoch": 0.98, + "learning_rate": 3.105200802313005e-06, + "loss": 1.0635, + "step": 4717 + }, + { + "epoch": 0.98, + "learning_rate": 3.1042180148129566e-06, + "loss": 0.8636, + "step": 4718 + }, + { + "epoch": 0.98, + "learning_rate": 3.103235216114689e-06, + "loss": 1.1091, + "step": 4719 + }, + { + "epoch": 0.98, + "learning_rate": 3.102252406323806e-06, + "loss": 1.1483, + "step": 4720 + }, + { + "epoch": 0.98, + "learning_rate": 3.1012695855459076e-06, + "loss": 0.7466, + "step": 4721 + }, + { + "epoch": 0.98, + "learning_rate": 3.1002867538866007e-06, + "loss": 0.9469, + "step": 4722 + }, + { + "epoch": 0.98, + "learning_rate": 3.0993039114514885e-06, + "loss": 0.9171, + "step": 4723 + }, + { + "epoch": 0.98, + "learning_rate": 3.0983210583461794e-06, + "loss": 0.7735, + "step": 4724 + }, + { + "epoch": 0.98, + "learning_rate": 3.0973381946762777e-06, + "loss": 0.6681, + "step": 4725 + }, + { + "epoch": 0.98, + "learning_rate": 3.0963553205473957e-06, + "loss": 0.8426, + "step": 4726 + }, + { + "epoch": 0.98, + "learning_rate": 3.0953724360651402e-06, + "loss": 0.8872, + "step": 4727 + }, + { + "epoch": 0.98, + "learning_rate": 3.0943895413351244e-06, + "loss": 1.2329, + "step": 4728 + }, + { + "epoch": 0.98, + "learning_rate": 3.0934066364629575e-06, + "loss": 0.994, + "step": 4729 + }, + { + "epoch": 0.98, + "learning_rate": 3.0924237215542554e-06, + "loss": 0.868, + "step": 4730 + }, + { + "epoch": 0.98, + "learning_rate": 3.0914407967146306e-06, + "loss": 1.073, + "step": 4731 + }, + { + "epoch": 0.98, + "learning_rate": 3.090457862049699e-06, + "loss": 1.0568, + "step": 4732 + }, + { + "epoch": 0.98, + "learning_rate": 3.0894749176650756e-06, + "loss": 1.0508, + "step": 4733 + }, + { + "epoch": 0.98, + "learning_rate": 3.08849196366638e-06, + "loss": 0.8911, + "step": 4734 + }, + { + "epoch": 0.98, + "learning_rate": 3.0875090001592283e-06, + "loss": 0.9374, + "step": 4735 + }, + { + "epoch": 0.99, + "learning_rate": 3.0865260272492412e-06, + "loss": 0.8447, + "step": 4736 + }, + { + "epoch": 0.99, + "learning_rate": 3.085543045042038e-06, + "loss": 0.9113, + "step": 4737 + }, + { + "epoch": 0.99, + "learning_rate": 3.084560053643242e-06, + "loss": 0.6478, + "step": 4738 + }, + { + "epoch": 0.99, + "learning_rate": 3.083577053158473e-06, + "loss": 1.1263, + "step": 4739 + }, + { + "epoch": 0.99, + "learning_rate": 3.082594043693356e-06, + "loss": 0.749, + "step": 4740 + }, + { + "epoch": 0.99, + "learning_rate": 3.081611025353515e-06, + "loss": 1.0218, + "step": 4741 + }, + { + "epoch": 0.99, + "learning_rate": 3.080627998244576e-06, + "loss": 0.9243, + "step": 4742 + }, + { + "epoch": 0.99, + "learning_rate": 3.0796449624721632e-06, + "loss": 0.7994, + "step": 4743 + }, + { + "epoch": 0.99, + "learning_rate": 3.078661918141905e-06, + "loss": 0.8497, + "step": 4744 + }, + { + "epoch": 0.99, + "learning_rate": 3.07767886535943e-06, + "loss": 0.9142, + "step": 4745 + }, + { + "epoch": 0.99, + "learning_rate": 3.076695804230366e-06, + "loss": 0.7573, + "step": 4746 + }, + { + "epoch": 0.99, + "learning_rate": 3.0757127348603427e-06, + "loss": 0.9221, + "step": 4747 + }, + { + "epoch": 0.99, + "learning_rate": 3.0747296573549922e-06, + "loss": 1.0157, + "step": 4748 + }, + { + "epoch": 0.99, + "learning_rate": 3.073746571819945e-06, + "loss": 0.8952, + "step": 4749 + }, + { + "epoch": 0.99, + "learning_rate": 3.0727634783608347e-06, + "loss": 1.0426, + "step": 4750 + }, + { + "epoch": 0.99, + "learning_rate": 3.071780377083293e-06, + "loss": 1.0076, + "step": 4751 + }, + { + "epoch": 0.99, + "learning_rate": 3.0707972680929552e-06, + "loss": 0.8209, + "step": 4752 + }, + { + "epoch": 0.99, + "learning_rate": 3.069814151495456e-06, + "loss": 0.9645, + "step": 4753 + }, + { + "epoch": 0.99, + "learning_rate": 3.0688310273964315e-06, + "loss": 1.032, + "step": 4754 + }, + { + "epoch": 0.99, + "learning_rate": 3.067847895901517e-06, + "loss": 0.8727, + "step": 4755 + }, + { + "epoch": 0.99, + "learning_rate": 3.066864757116352e-06, + "loss": 0.9957, + "step": 4756 + }, + { + "epoch": 0.99, + "learning_rate": 3.0658816111465727e-06, + "loss": 0.9256, + "step": 4757 + }, + { + "epoch": 0.99, + "learning_rate": 3.064898458097819e-06, + "loss": 0.8241, + "step": 4758 + }, + { + "epoch": 0.99, + "learning_rate": 3.0639152980757318e-06, + "loss": 1.0442, + "step": 4759 + }, + { + "epoch": 0.99, + "learning_rate": 3.0629321311859493e-06, + "loss": 0.8581, + "step": 4760 + }, + { + "epoch": 0.99, + "learning_rate": 3.0619489575341147e-06, + "loss": 0.7346, + "step": 4761 + }, + { + "epoch": 0.99, + "learning_rate": 3.060965777225868e-06, + "loss": 0.8647, + "step": 4762 + }, + { + "epoch": 0.99, + "learning_rate": 3.059982590366854e-06, + "loss": 0.8274, + "step": 4763 + }, + { + "epoch": 0.99, + "learning_rate": 3.0589993970627136e-06, + "loss": 1.1737, + "step": 4764 + }, + { + "epoch": 0.99, + "learning_rate": 3.058016197419093e-06, + "loss": 1.129, + "step": 4765 + }, + { + "epoch": 0.99, + "learning_rate": 3.0570329915416357e-06, + "loss": 1.0323, + "step": 4766 + }, + { + "epoch": 0.99, + "learning_rate": 3.0560497795359884e-06, + "loss": 0.8479, + "step": 4767 + }, + { + "epoch": 0.99, + "learning_rate": 3.0550665615077953e-06, + "loss": 1.0665, + "step": 4768 + }, + { + "epoch": 0.99, + "learning_rate": 3.054083337562705e-06, + "loss": 0.7783, + "step": 4769 + }, + { + "epoch": 0.99, + "learning_rate": 3.053100107806363e-06, + "loss": 0.7721, + "step": 4770 + }, + { + "epoch": 0.99, + "learning_rate": 3.0521168723444194e-06, + "loss": 1.1013, + "step": 4771 + }, + { + "epoch": 0.99, + "learning_rate": 3.0511336312825207e-06, + "loss": 0.8508, + "step": 4772 + }, + { + "epoch": 0.99, + "learning_rate": 3.050150384726317e-06, + "loss": 0.8233, + "step": 4773 + }, + { + "epoch": 0.99, + "learning_rate": 3.049167132781458e-06, + "loss": 0.8748, + "step": 4774 + }, + { + "epoch": 0.99, + "learning_rate": 3.048183875553595e-06, + "loss": 1.1237, + "step": 4775 + }, + { + "epoch": 0.99, + "learning_rate": 3.047200613148377e-06, + "loss": 1.0575, + "step": 4776 + }, + { + "epoch": 0.99, + "learning_rate": 3.046217345671457e-06, + "loss": 0.861, + "step": 4777 + }, + { + "epoch": 0.99, + "learning_rate": 3.0452340732284858e-06, + "loss": 1.0142, + "step": 4778 + }, + { + "epoch": 0.99, + "learning_rate": 3.0442507959251172e-06, + "loss": 0.7891, + "step": 4779 + }, + { + "epoch": 0.99, + "learning_rate": 3.043267513867004e-06, + "loss": 0.8309, + "step": 4780 + }, + { + "epoch": 0.99, + "learning_rate": 3.0422842271597997e-06, + "loss": 0.949, + "step": 4781 + }, + { + "epoch": 0.99, + "learning_rate": 3.041300935909157e-06, + "loss": 0.9989, + "step": 4782 + }, + { + "epoch": 0.99, + "learning_rate": 3.0403176402207325e-06, + "loss": 0.8259, + "step": 4783 + }, + { + "epoch": 1.0, + "learning_rate": 3.0393343402001805e-06, + "loss": 0.9012, + "step": 4784 + }, + { + "epoch": 1.0, + "learning_rate": 3.038351035953156e-06, + "loss": 1.0597, + "step": 4785 + }, + { + "epoch": 1.0, + "learning_rate": 3.037367727585316e-06, + "loss": 0.9383, + "step": 4786 + }, + { + "epoch": 1.0, + "learning_rate": 3.0363844152023165e-06, + "loss": 0.8889, + "step": 4787 + }, + { + "epoch": 1.0, + "learning_rate": 3.0354010989098135e-06, + "loss": 0.8427, + "step": 4788 + }, + { + "epoch": 1.0, + "learning_rate": 3.034417778813465e-06, + "loss": 0.9037, + "step": 4789 + }, + { + "epoch": 1.0, + "learning_rate": 3.0334344550189284e-06, + "loss": 0.8397, + "step": 4790 + }, + { + "epoch": 1.0, + "learning_rate": 3.032451127631863e-06, + "loss": 1.0384, + "step": 4791 + }, + { + "epoch": 1.0, + "learning_rate": 3.031467796757925e-06, + "loss": 0.9856, + "step": 4792 + }, + { + "epoch": 1.0, + "learning_rate": 3.0304844625027757e-06, + "loss": 0.9437, + "step": 4793 + }, + { + "epoch": 1.0, + "learning_rate": 3.029501124972072e-06, + "loss": 1.0169, + "step": 4794 + }, + { + "epoch": 1.0, + "learning_rate": 3.0285177842714758e-06, + "loss": 0.7272, + "step": 4795 + }, + { + "epoch": 1.0, + "learning_rate": 3.027534440506644e-06, + "loss": 0.8877, + "step": 4796 + }, + { + "epoch": 1.0, + "learning_rate": 3.0265510937832402e-06, + "loss": 1.0549, + "step": 4797 + }, + { + "epoch": 1.0, + "learning_rate": 3.025567744206922e-06, + "loss": 0.8514, + "step": 4798 + }, + { + "epoch": 1.0, + "learning_rate": 3.024584391883353e-06, + "loss": 0.7945, + "step": 4799 + }, + { + "epoch": 1.0, + "learning_rate": 3.0236010369181914e-06, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 1.0, + "learning_rate": 3.0226176794171006e-06, + "loss": 0.9479, + "step": 4801 + }, + { + "epoch": 1.0, + "learning_rate": 3.0216343194857417e-06, + "loss": 0.8198, + "step": 4802 + }, + { + "epoch": 1.0, + "learning_rate": 3.0206509572297768e-06, + "loss": 0.71, + "step": 4803 + }, + { + "epoch": 1.0, + "learning_rate": 3.019667592754868e-06, + "loss": 0.8367, + "step": 4804 + }, + { + "epoch": 1.0, + "learning_rate": 3.018684226166678e-06, + "loss": 0.9796, + "step": 4805 + }, + { + "epoch": 1.0, + "learning_rate": 3.0177008575708684e-06, + "loss": 0.7916, + "step": 4806 + }, + { + "epoch": 1.0, + "learning_rate": 3.016717487073104e-06, + "loss": 1.0066, + "step": 4807 + }, + { + "epoch": 1.0, + "learning_rate": 3.015734114779046e-06, + "loss": 0.8551, + "step": 4808 + }, + { + "epoch": 1.0, + "learning_rate": 3.0147507407943593e-06, + "loss": 1.0087, + "step": 4809 + }, + { + "epoch": 1.0, + "learning_rate": 3.013767365224706e-06, + "loss": 0.8467, + "step": 4810 + }, + { + "epoch": 1.0, + "learning_rate": 3.01278398817575e-06, + "loss": 0.7397, + "step": 4811 + }, + { + "epoch": 1.0, + "learning_rate": 3.0118006097531555e-06, + "loss": 0.801, + "step": 4812 + }, + { + "epoch": 1.0, + "learning_rate": 3.010817230062587e-06, + "loss": 1.0023, + "step": 4813 + }, + { + "epoch": 1.0, + "learning_rate": 3.009833849209707e-06, + "loss": 0.8045, + "step": 4814 + }, + { + "epoch": 1.0, + "learning_rate": 3.0088504673001814e-06, + "loss": 0.8411, + "step": 4815 + }, + { + "epoch": 1.0, + "learning_rate": 3.0078670844396733e-06, + "loss": 0.7356, + "step": 4816 + }, + { + "epoch": 1.0, + "learning_rate": 3.006883700733848e-06, + "loss": 0.8737, + "step": 4817 + }, + { + "epoch": 1.0, + "learning_rate": 3.0059003162883683e-06, + "loss": 0.7862, + "step": 4818 + }, + { + "epoch": 1.0, + "learning_rate": 3.004916931208901e-06, + "loss": 0.8311, + "step": 4819 + }, + { + "epoch": 1.0, + "learning_rate": 3.00393354560111e-06, + "loss": 1.0427, + "step": 4820 + }, + { + "epoch": 1.0, + "learning_rate": 3.0029501595706593e-06, + "loss": 0.9415, + "step": 4821 + }, + { + "epoch": 1.0, + "learning_rate": 3.0019667732232146e-06, + "loss": 0.5958, + "step": 4822 + }, + { + "epoch": 1.0, + "learning_rate": 3.00098338666444e-06, + "loss": 0.9194, + "step": 4823 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.878, + "step": 4824 + }, + { + "epoch": 1.0, + "learning_rate": 2.999016613335561e-06, + "loss": 0.9068, + "step": 4825 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980332267767855e-06, + "loss": 0.6651, + "step": 4826 + }, + { + "epoch": 1.0, + "learning_rate": 2.997049840429341e-06, + "loss": 0.8941, + "step": 4827 + }, + { + "epoch": 1.0, + "learning_rate": 2.9960664543988908e-06, + "loss": 0.9203, + "step": 4828 + }, + { + "epoch": 1.0, + "learning_rate": 2.9950830687911e-06, + "loss": 0.8563, + "step": 4829 + }, + { + "epoch": 1.0, + "learning_rate": 2.9940996837116314e-06, + "loss": 0.8812, + "step": 4830 + }, + { + "epoch": 1.0, + "learning_rate": 2.993116299266153e-06, + "loss": 1.1905, + "step": 4831 + }, + { + "epoch": 1.0, + "learning_rate": 2.9921329155603277e-06, + "loss": 0.8708, + "step": 4832 + }, + { + "epoch": 1.01, + "learning_rate": 2.99114953269982e-06, + "loss": 0.8801, + "step": 4833 + }, + { + "epoch": 1.01, + "learning_rate": 2.990166150790293e-06, + "loss": 0.7808, + "step": 4834 + }, + { + "epoch": 1.01, + "learning_rate": 2.9891827699374137e-06, + "loss": 0.8423, + "step": 4835 + }, + { + "epoch": 1.01, + "learning_rate": 2.988199390246845e-06, + "loss": 0.9464, + "step": 4836 + }, + { + "epoch": 1.01, + "learning_rate": 2.987216011824251e-06, + "loss": 0.9671, + "step": 4837 + }, + { + "epoch": 1.01, + "learning_rate": 2.9862326347752942e-06, + "loss": 1.0859, + "step": 4838 + }, + { + "epoch": 1.01, + "learning_rate": 2.9852492592056417e-06, + "loss": 0.9065, + "step": 4839 + }, + { + "epoch": 1.01, + "learning_rate": 2.9842658852209542e-06, + "loss": 1.1341, + "step": 4840 + }, + { + "epoch": 1.01, + "learning_rate": 2.9832825129268975e-06, + "loss": 0.9484, + "step": 4841 + }, + { + "epoch": 1.01, + "learning_rate": 2.9822991424291313e-06, + "loss": 0.8426, + "step": 4842 + }, + { + "epoch": 1.01, + "learning_rate": 2.9813157738333225e-06, + "loss": 1.0961, + "step": 4843 + }, + { + "epoch": 1.01, + "learning_rate": 2.9803324072451326e-06, + "loss": 1.1208, + "step": 4844 + }, + { + "epoch": 1.01, + "learning_rate": 2.9793490427702242e-06, + "loss": 0.8845, + "step": 4845 + }, + { + "epoch": 1.01, + "learning_rate": 2.978365680514258e-06, + "loss": 0.8944, + "step": 4846 + }, + { + "epoch": 1.01, + "learning_rate": 2.9773823205828996e-06, + "loss": 0.9701, + "step": 4847 + }, + { + "epoch": 1.01, + "learning_rate": 2.9763989630818096e-06, + "loss": 0.7674, + "step": 4848 + }, + { + "epoch": 1.01, + "learning_rate": 2.9754156081166485e-06, + "loss": 0.9616, + "step": 4849 + }, + { + "epoch": 1.01, + "learning_rate": 2.9744322557930774e-06, + "loss": 1.1004, + "step": 4850 + }, + { + "epoch": 1.01, + "learning_rate": 2.9734489062167603e-06, + "loss": 1.137, + "step": 4851 + }, + { + "epoch": 1.01, + "learning_rate": 2.972465559493356e-06, + "loss": 0.9499, + "step": 4852 + }, + { + "epoch": 1.01, + "learning_rate": 2.9714822157285256e-06, + "loss": 0.9114, + "step": 4853 + }, + { + "epoch": 1.01, + "learning_rate": 2.9704988750279275e-06, + "loss": 1.1018, + "step": 4854 + }, + { + "epoch": 1.01, + "learning_rate": 2.969515537497225e-06, + "loss": 0.8753, + "step": 4855 + }, + { + "epoch": 1.01, + "learning_rate": 2.9685322032420753e-06, + "loss": 0.6706, + "step": 4856 + }, + { + "epoch": 1.01, + "learning_rate": 2.9675488723681385e-06, + "loss": 0.9373, + "step": 4857 + }, + { + "epoch": 1.01, + "learning_rate": 2.9665655449810713e-06, + "loss": 0.8715, + "step": 4858 + }, + { + "epoch": 1.01, + "learning_rate": 2.9655822211865355e-06, + "loss": 0.9604, + "step": 4859 + }, + { + "epoch": 1.01, + "learning_rate": 2.9645989010901875e-06, + "loss": 0.8088, + "step": 4860 + }, + { + "epoch": 1.01, + "learning_rate": 2.9636155847976853e-06, + "loss": 0.863, + "step": 4861 + }, + { + "epoch": 1.01, + "learning_rate": 2.962632272414684e-06, + "loss": 0.7965, + "step": 4862 + }, + { + "epoch": 1.01, + "learning_rate": 2.961648964046844e-06, + "loss": 0.964, + "step": 4863 + }, + { + "epoch": 1.01, + "learning_rate": 2.96066565979982e-06, + "loss": 0.7843, + "step": 4864 + }, + { + "epoch": 1.01, + "learning_rate": 2.9596823597792685e-06, + "loss": 0.8575, + "step": 4865 + }, + { + "epoch": 1.01, + "learning_rate": 2.9586990640908427e-06, + "loss": 0.7679, + "step": 4866 + }, + { + "epoch": 1.01, + "learning_rate": 2.9577157728402013e-06, + "loss": 0.7675, + "step": 4867 + }, + { + "epoch": 1.01, + "learning_rate": 2.9567324861329968e-06, + "loss": 0.9986, + "step": 4868 + }, + { + "epoch": 1.01, + "learning_rate": 2.9557492040748833e-06, + "loss": 0.8811, + "step": 4869 + }, + { + "epoch": 1.01, + "learning_rate": 2.954765926771514e-06, + "loss": 0.935, + "step": 4870 + }, + { + "epoch": 1.01, + "learning_rate": 2.9537826543285437e-06, + "loss": 0.9139, + "step": 4871 + }, + { + "epoch": 1.01, + "learning_rate": 2.952799386851624e-06, + "loss": 0.8914, + "step": 4872 + }, + { + "epoch": 1.01, + "learning_rate": 2.9518161244464066e-06, + "loss": 0.943, + "step": 4873 + }, + { + "epoch": 1.01, + "learning_rate": 2.950832867218542e-06, + "loss": 1.014, + "step": 4874 + }, + { + "epoch": 1.01, + "learning_rate": 2.9498496152736833e-06, + "loss": 0.8065, + "step": 4875 + }, + { + "epoch": 1.01, + "learning_rate": 2.9488663687174803e-06, + "loss": 0.7588, + "step": 4876 + }, + { + "epoch": 1.01, + "learning_rate": 2.947883127655582e-06, + "loss": 1.0231, + "step": 4877 + }, + { + "epoch": 1.01, + "learning_rate": 2.9468998921936365e-06, + "loss": 1.163, + "step": 4878 + }, + { + "epoch": 1.01, + "learning_rate": 2.9459166624372953e-06, + "loss": 1.0346, + "step": 4879 + }, + { + "epoch": 1.01, + "learning_rate": 2.944933438492205e-06, + "loss": 1.119, + "step": 4880 + }, + { + "epoch": 1.02, + "learning_rate": 2.9439502204640126e-06, + "loss": 0.8924, + "step": 4881 + }, + { + "epoch": 1.02, + "learning_rate": 2.9429670084583636e-06, + "loss": 0.8066, + "step": 4882 + }, + { + "epoch": 1.02, + "learning_rate": 2.9419838025809073e-06, + "loss": 1.0171, + "step": 4883 + }, + { + "epoch": 1.02, + "learning_rate": 2.941000602937287e-06, + "loss": 0.805, + "step": 4884 + }, + { + "epoch": 1.02, + "learning_rate": 2.940017409633148e-06, + "loss": 1.0707, + "step": 4885 + }, + { + "epoch": 1.02, + "learning_rate": 2.939034222774132e-06, + "loss": 0.8886, + "step": 4886 + }, + { + "epoch": 1.02, + "learning_rate": 2.9380510424658863e-06, + "loss": 0.7881, + "step": 4887 + }, + { + "epoch": 1.02, + "learning_rate": 2.9370678688140513e-06, + "loss": 1.1671, + "step": 4888 + }, + { + "epoch": 1.02, + "learning_rate": 2.9360847019242697e-06, + "loss": 0.7981, + "step": 4889 + }, + { + "epoch": 1.02, + "learning_rate": 2.9351015419021808e-06, + "loss": 0.9717, + "step": 4890 + }, + { + "epoch": 1.02, + "learning_rate": 2.934118388853428e-06, + "loss": 1.052, + "step": 4891 + }, + { + "epoch": 1.02, + "learning_rate": 2.9331352428836494e-06, + "loss": 0.8204, + "step": 4892 + }, + { + "epoch": 1.02, + "learning_rate": 2.932152104098483e-06, + "loss": 1.1149, + "step": 4893 + }, + { + "epoch": 1.02, + "learning_rate": 2.9311689726035695e-06, + "loss": 0.7495, + "step": 4894 + }, + { + "epoch": 1.02, + "learning_rate": 2.930185848504545e-06, + "loss": 0.7219, + "step": 4895 + }, + { + "epoch": 1.02, + "learning_rate": 2.9292027319070458e-06, + "loss": 1.0186, + "step": 4896 + }, + { + "epoch": 1.02, + "learning_rate": 2.9282196229167073e-06, + "loss": 0.8162, + "step": 4897 + }, + { + "epoch": 1.02, + "learning_rate": 2.927236521639166e-06, + "loss": 1.0066, + "step": 4898 + }, + { + "epoch": 1.02, + "learning_rate": 2.9262534281800554e-06, + "loss": 1.0199, + "step": 4899 + }, + { + "epoch": 1.02, + "learning_rate": 2.9252703426450088e-06, + "loss": 1.0815, + "step": 4900 + }, + { + "epoch": 1.02, + "learning_rate": 2.9242872651396566e-06, + "loss": 0.7699, + "step": 4901 + }, + { + "epoch": 1.02, + "learning_rate": 2.9233041957696346e-06, + "loss": 1.0586, + "step": 4902 + }, + { + "epoch": 1.02, + "learning_rate": 2.922321134640571e-06, + "loss": 0.8827, + "step": 4903 + }, + { + "epoch": 1.02, + "learning_rate": 2.921338081858096e-06, + "loss": 0.8453, + "step": 4904 + }, + { + "epoch": 1.02, + "learning_rate": 2.920355037527837e-06, + "loss": 0.9465, + "step": 4905 + }, + { + "epoch": 1.02, + "learning_rate": 2.9193720017554246e-06, + "loss": 0.8433, + "step": 4906 + }, + { + "epoch": 1.02, + "learning_rate": 2.918388974646485e-06, + "loss": 0.8208, + "step": 4907 + }, + { + "epoch": 1.02, + "learning_rate": 2.9174059563066444e-06, + "loss": 0.8775, + "step": 4908 + }, + { + "epoch": 1.02, + "learning_rate": 2.9164229468415265e-06, + "loss": 0.8866, + "step": 4909 + }, + { + "epoch": 1.02, + "learning_rate": 2.9154399463567583e-06, + "loss": 0.845, + "step": 4910 + }, + { + "epoch": 1.02, + "learning_rate": 2.914456954957962e-06, + "loss": 1.0267, + "step": 4911 + }, + { + "epoch": 1.02, + "learning_rate": 2.9134739727507598e-06, + "loss": 0.8045, + "step": 4912 + }, + { + "epoch": 1.02, + "learning_rate": 2.9124909998407714e-06, + "loss": 0.8161, + "step": 4913 + }, + { + "epoch": 1.02, + "learning_rate": 2.9115080363336208e-06, + "loss": 0.9978, + "step": 4914 + }, + { + "epoch": 1.02, + "learning_rate": 2.9105250823349245e-06, + "loss": 0.9319, + "step": 4915 + }, + { + "epoch": 1.02, + "learning_rate": 2.9095421379503025e-06, + "loss": 0.7482, + "step": 4916 + }, + { + "epoch": 1.02, + "learning_rate": 2.9085592032853695e-06, + "loss": 0.8058, + "step": 4917 + }, + { + "epoch": 1.02, + "learning_rate": 2.907576278445745e-06, + "loss": 0.855, + "step": 4918 + }, + { + "epoch": 1.02, + "learning_rate": 2.906593363537043e-06, + "loss": 1.1925, + "step": 4919 + }, + { + "epoch": 1.02, + "learning_rate": 2.9056104586648775e-06, + "loss": 1.0621, + "step": 4920 + }, + { + "epoch": 1.02, + "learning_rate": 2.90462756393486e-06, + "loss": 1.0863, + "step": 4921 + }, + { + "epoch": 1.02, + "learning_rate": 2.903644679452605e-06, + "loss": 0.9526, + "step": 4922 + }, + { + "epoch": 1.02, + "learning_rate": 2.9026618053237224e-06, + "loss": 0.7859, + "step": 4923 + }, + { + "epoch": 1.02, + "learning_rate": 2.901678941653822e-06, + "loss": 0.851, + "step": 4924 + }, + { + "epoch": 1.02, + "learning_rate": 2.900696088548511e-06, + "loss": 1.093, + "step": 4925 + }, + { + "epoch": 1.02, + "learning_rate": 2.8997132461133994e-06, + "loss": 0.8763, + "step": 4926 + }, + { + "epoch": 1.02, + "learning_rate": 2.8987304144540925e-06, + "loss": 0.8493, + "step": 4927 + }, + { + "epoch": 1.02, + "learning_rate": 2.8977475936761953e-06, + "loss": 1.069, + "step": 4928 + }, + { + "epoch": 1.03, + "learning_rate": 2.8967647838853114e-06, + "loss": 0.6994, + "step": 4929 + }, + { + "epoch": 1.03, + "learning_rate": 2.895781985187044e-06, + "loss": 0.6432, + "step": 4930 + }, + { + "epoch": 1.03, + "learning_rate": 2.894799197686995e-06, + "loss": 1.2045, + "step": 4931 + }, + { + "epoch": 1.03, + "learning_rate": 2.8938164214907653e-06, + "loss": 0.905, + "step": 4932 + }, + { + "epoch": 1.03, + "learning_rate": 2.8928336567039527e-06, + "loss": 0.999, + "step": 4933 + }, + { + "epoch": 1.03, + "learning_rate": 2.891850903432156e-06, + "loss": 0.9282, + "step": 4934 + }, + { + "epoch": 1.03, + "learning_rate": 2.8908681617809726e-06, + "loss": 0.9901, + "step": 4935 + }, + { + "epoch": 1.03, + "learning_rate": 2.8898854318559978e-06, + "loss": 1.0452, + "step": 4936 + }, + { + "epoch": 1.03, + "learning_rate": 2.8889027137628243e-06, + "loss": 0.8073, + "step": 4937 + }, + { + "epoch": 1.03, + "learning_rate": 2.8879200076070467e-06, + "loss": 0.976, + "step": 4938 + }, + { + "epoch": 1.03, + "learning_rate": 2.8869373134942564e-06, + "loss": 0.9426, + "step": 4939 + }, + { + "epoch": 1.03, + "learning_rate": 2.8859546315300443e-06, + "loss": 0.9028, + "step": 4940 + }, + { + "epoch": 1.03, + "learning_rate": 2.884971961819998e-06, + "loss": 0.8477, + "step": 4941 + }, + { + "epoch": 1.03, + "learning_rate": 2.883989304469706e-06, + "loss": 0.9899, + "step": 4942 + }, + { + "epoch": 1.03, + "learning_rate": 2.883006659584756e-06, + "loss": 0.9921, + "step": 4943 + }, + { + "epoch": 1.03, + "learning_rate": 2.882024027270732e-06, + "loss": 1.0557, + "step": 4944 + }, + { + "epoch": 1.03, + "learning_rate": 2.8810414076332173e-06, + "loss": 1.0046, + "step": 4945 + }, + { + "epoch": 1.03, + "learning_rate": 2.8800588007777946e-06, + "loss": 0.8501, + "step": 4946 + }, + { + "epoch": 1.03, + "learning_rate": 2.879076206810046e-06, + "loss": 0.8767, + "step": 4947 + }, + { + "epoch": 1.03, + "learning_rate": 2.87809362583555e-06, + "loss": 0.8047, + "step": 4948 + }, + { + "epoch": 1.03, + "learning_rate": 2.8771110579598846e-06, + "loss": 0.8906, + "step": 4949 + }, + { + "epoch": 1.03, + "learning_rate": 2.8761285032886278e-06, + "loss": 0.8758, + "step": 4950 + }, + { + "epoch": 1.03, + "learning_rate": 2.8751459619273545e-06, + "loss": 0.9637, + "step": 4951 + }, + { + "epoch": 1.03, + "learning_rate": 2.8741634339816394e-06, + "loss": 0.8348, + "step": 4952 + }, + { + "epoch": 1.03, + "learning_rate": 2.873180919557053e-06, + "loss": 0.8219, + "step": 4953 + }, + { + "epoch": 1.03, + "learning_rate": 2.8721984187591683e-06, + "loss": 0.9589, + "step": 4954 + }, + { + "epoch": 1.03, + "learning_rate": 2.871215931693555e-06, + "loss": 0.8255, + "step": 4955 + }, + { + "epoch": 1.03, + "learning_rate": 2.87023345846578e-06, + "loss": 0.9057, + "step": 4956 + }, + { + "epoch": 1.03, + "learning_rate": 2.8692509991814103e-06, + "loss": 0.8457, + "step": 4957 + }, + { + "epoch": 1.03, + "learning_rate": 2.8682685539460116e-06, + "loss": 0.9313, + "step": 4958 + }, + { + "epoch": 1.03, + "learning_rate": 2.867286122865148e-06, + "loss": 0.8886, + "step": 4959 + }, + { + "epoch": 1.03, + "learning_rate": 2.8663037060443804e-06, + "loss": 0.815, + "step": 4960 + }, + { + "epoch": 1.03, + "learning_rate": 2.8653213035892704e-06, + "loss": 0.7729, + "step": 4961 + }, + { + "epoch": 1.03, + "learning_rate": 2.8643389156053762e-06, + "loss": 0.9942, + "step": 4962 + }, + { + "epoch": 1.03, + "learning_rate": 2.8633565421982566e-06, + "loss": 0.7948, + "step": 4963 + }, + { + "epoch": 1.03, + "learning_rate": 2.862374183473466e-06, + "loss": 0.7796, + "step": 4964 + }, + { + "epoch": 1.03, + "learning_rate": 2.86139183953656e-06, + "loss": 0.997, + "step": 4965 + }, + { + "epoch": 1.03, + "learning_rate": 2.8604095104930906e-06, + "loss": 0.731, + "step": 4966 + }, + { + "epoch": 1.03, + "learning_rate": 2.8594271964486103e-06, + "loss": 0.788, + "step": 4967 + }, + { + "epoch": 1.03, + "learning_rate": 2.8584448975086667e-06, + "loss": 0.9927, + "step": 4968 + }, + { + "epoch": 1.03, + "learning_rate": 2.8574626137788095e-06, + "loss": 1.0731, + "step": 4969 + }, + { + "epoch": 1.03, + "learning_rate": 2.8564803453645836e-06, + "loss": 0.9894, + "step": 4970 + }, + { + "epoch": 1.03, + "learning_rate": 2.855498092371535e-06, + "loss": 0.8738, + "step": 4971 + }, + { + "epoch": 1.03, + "learning_rate": 2.8545158549052055e-06, + "loss": 0.8113, + "step": 4972 + }, + { + "epoch": 1.03, + "learning_rate": 2.8535336330711374e-06, + "loss": 0.8419, + "step": 4973 + }, + { + "epoch": 1.03, + "learning_rate": 2.8525514269748695e-06, + "loss": 1.1289, + "step": 4974 + }, + { + "epoch": 1.03, + "learning_rate": 2.8515692367219407e-06, + "loss": 0.8231, + "step": 4975 + }, + { + "epoch": 1.03, + "learning_rate": 2.8505870624178864e-06, + "loss": 0.9375, + "step": 4976 + }, + { + "epoch": 1.04, + "learning_rate": 2.8496049041682426e-06, + "loss": 1.0668, + "step": 4977 + }, + { + "epoch": 1.04, + "learning_rate": 2.8486227620785398e-06, + "loss": 1.0179, + "step": 4978 + }, + { + "epoch": 1.04, + "learning_rate": 2.8476406362543116e-06, + "loss": 0.7766, + "step": 4979 + }, + { + "epoch": 1.04, + "learning_rate": 2.8466585268010847e-06, + "loss": 0.8783, + "step": 4980 + }, + { + "epoch": 1.04, + "learning_rate": 2.8456764338243893e-06, + "loss": 1.2013, + "step": 4981 + }, + { + "epoch": 1.04, + "learning_rate": 2.84469435742975e-06, + "loss": 0.8621, + "step": 4982 + }, + { + "epoch": 1.04, + "learning_rate": 2.8437122977226906e-06, + "loss": 0.9092, + "step": 4983 + }, + { + "epoch": 1.04, + "learning_rate": 2.8427302548087337e-06, + "loss": 1.038, + "step": 4984 + }, + { + "epoch": 1.04, + "learning_rate": 2.8417482287934e-06, + "loss": 0.8483, + "step": 4985 + }, + { + "epoch": 1.04, + "learning_rate": 2.8407662197822073e-06, + "loss": 0.86, + "step": 4986 + }, + { + "epoch": 1.04, + "learning_rate": 2.8397842278806737e-06, + "loss": 0.938, + "step": 4987 + }, + { + "epoch": 1.04, + "learning_rate": 2.838802253194312e-06, + "loss": 1.0393, + "step": 4988 + }, + { + "epoch": 1.04, + "learning_rate": 2.837820295828638e-06, + "loss": 0.9221, + "step": 4989 + }, + { + "epoch": 1.04, + "learning_rate": 2.8368383558891614e-06, + "loss": 0.9856, + "step": 4990 + }, + { + "epoch": 1.04, + "learning_rate": 2.8358564334813913e-06, + "loss": 0.819, + "step": 4991 + }, + { + "epoch": 1.04, + "learning_rate": 2.834874528710836e-06, + "loss": 0.9211, + "step": 4992 + }, + { + "epoch": 1.04, + "learning_rate": 2.833892641683001e-06, + "loss": 0.9553, + "step": 4993 + }, + { + "epoch": 1.04, + "learning_rate": 2.83291077250339e-06, + "loss": 0.7451, + "step": 4994 + }, + { + "epoch": 1.04, + "learning_rate": 2.8319289212775042e-06, + "loss": 0.7096, + "step": 4995 + }, + { + "epoch": 1.04, + "learning_rate": 2.830947088110843e-06, + "loss": 0.7362, + "step": 4996 + }, + { + "epoch": 1.04, + "learning_rate": 2.8299652731089066e-06, + "loss": 0.9312, + "step": 4997 + }, + { + "epoch": 1.04, + "learning_rate": 2.8289834763771894e-06, + "loss": 1.1367, + "step": 4998 + }, + { + "epoch": 1.04, + "learning_rate": 2.828001698021184e-06, + "loss": 0.8969, + "step": 4999 + }, + { + "epoch": 1.04, + "learning_rate": 2.8270199381463846e-06, + "loss": 0.7711, + "step": 5000 + }, + { + "epoch": 1.04, + "eval_loss": NaN, + "eval_runtime": 15.0679, + "eval_samples_per_second": 351.41, + "eval_steps_per_second": 43.935, + "step": 5000 + }, + { + "epoch": 1.04, + "learning_rate": 2.826038196858281e-06, + "loss": 0.7925, + "step": 5001 + }, + { + "epoch": 1.04, + "learning_rate": 2.825056474262361e-06, + "loss": 0.8544, + "step": 5002 + }, + { + "epoch": 1.04, + "learning_rate": 2.824074770464109e-06, + "loss": 0.8577, + "step": 5003 + }, + { + "epoch": 1.04, + "learning_rate": 2.823093085569011e-06, + "loss": 1.1027, + "step": 5004 + }, + { + "epoch": 1.04, + "learning_rate": 2.8221114196825485e-06, + "loss": 0.868, + "step": 5005 + }, + { + "epoch": 1.04, + "learning_rate": 2.8211297729102017e-06, + "loss": 0.7795, + "step": 5006 + }, + { + "epoch": 1.04, + "learning_rate": 2.8201481453574474e-06, + "loss": 0.9592, + "step": 5007 + }, + { + "epoch": 1.04, + "learning_rate": 2.8191665371297617e-06, + "loss": 1.1029, + "step": 5008 + }, + { + "epoch": 1.04, + "learning_rate": 2.8181849483326197e-06, + "loss": 0.7897, + "step": 5009 + }, + { + "epoch": 1.04, + "learning_rate": 2.8172033790714917e-06, + "loss": 0.7872, + "step": 5010 + }, + { + "epoch": 1.04, + "learning_rate": 2.816221829451847e-06, + "loss": 0.8785, + "step": 5011 + }, + { + "epoch": 1.04, + "learning_rate": 2.8152402995791536e-06, + "loss": 0.9566, + "step": 5012 + }, + { + "epoch": 1.04, + "learning_rate": 2.8142587895588776e-06, + "loss": 1.1451, + "step": 5013 + }, + { + "epoch": 1.04, + "learning_rate": 2.8132772994964813e-06, + "loss": 0.8867, + "step": 5014 + }, + { + "epoch": 1.04, + "learning_rate": 2.8122958294974254e-06, + "loss": 0.9566, + "step": 5015 + }, + { + "epoch": 1.04, + "learning_rate": 2.8113143796671687e-06, + "loss": 0.8351, + "step": 5016 + }, + { + "epoch": 1.04, + "learning_rate": 2.8103329501111696e-06, + "loss": 1.1455, + "step": 5017 + }, + { + "epoch": 1.04, + "learning_rate": 2.809351540934881e-06, + "loss": 0.8174, + "step": 5018 + }, + { + "epoch": 1.04, + "learning_rate": 2.8083701522437556e-06, + "loss": 0.7852, + "step": 5019 + }, + { + "epoch": 1.04, + "learning_rate": 2.807388784143244e-06, + "loss": 0.8544, + "step": 5020 + }, + { + "epoch": 1.04, + "learning_rate": 2.8064074367387934e-06, + "loss": 0.9393, + "step": 5021 + }, + { + "epoch": 1.04, + "learning_rate": 2.8054261101358504e-06, + "loss": 1.1143, + "step": 5022 + }, + { + "epoch": 1.04, + "learning_rate": 2.8044448044398575e-06, + "loss": 0.9109, + "step": 5023 + }, + { + "epoch": 1.04, + "learning_rate": 2.8034635197562563e-06, + "loss": 0.9565, + "step": 5024 + }, + { + "epoch": 1.05, + "learning_rate": 2.802482256190486e-06, + "loss": 1.0175, + "step": 5025 + }, + { + "epoch": 1.05, + "learning_rate": 2.801501013847984e-06, + "loss": 1.0275, + "step": 5026 + }, + { + "epoch": 1.05, + "learning_rate": 2.8005197928341825e-06, + "loss": 1.1145, + "step": 5027 + }, + { + "epoch": 1.05, + "learning_rate": 2.799538593254516e-06, + "loss": 0.843, + "step": 5028 + }, + { + "epoch": 1.05, + "learning_rate": 2.7985574152144132e-06, + "loss": 0.6934, + "step": 5029 + }, + { + "epoch": 1.05, + "learning_rate": 2.797576258819302e-06, + "loss": 0.924, + "step": 5030 + }, + { + "epoch": 1.05, + "learning_rate": 2.7965951241746062e-06, + "loss": 1.0669, + "step": 5031 + }, + { + "epoch": 1.05, + "learning_rate": 2.7956140113857507e-06, + "loss": 0.6598, + "step": 5032 + }, + { + "epoch": 1.05, + "learning_rate": 2.7946329205581556e-06, + "loss": 0.7969, + "step": 5033 + }, + { + "epoch": 1.05, + "learning_rate": 2.793651851797238e-06, + "loss": 1.1766, + "step": 5034 + }, + { + "epoch": 1.05, + "learning_rate": 2.792670805208413e-06, + "loss": 0.9353, + "step": 5035 + }, + { + "epoch": 1.05, + "learning_rate": 2.791689780897097e-06, + "loss": 0.8661, + "step": 5036 + }, + { + "epoch": 1.05, + "learning_rate": 2.7907087789686987e-06, + "loss": 0.8104, + "step": 5037 + }, + { + "epoch": 1.05, + "learning_rate": 2.7897277995286272e-06, + "loss": 0.9052, + "step": 5038 + }, + { + "epoch": 1.05, + "learning_rate": 2.7887468426822875e-06, + "loss": 0.991, + "step": 5039 + }, + { + "epoch": 1.05, + "learning_rate": 2.7877659085350858e-06, + "loss": 0.9302, + "step": 5040 + }, + { + "epoch": 1.05, + "learning_rate": 2.786784997192422e-06, + "loss": 1.0034, + "step": 5041 + }, + { + "epoch": 1.05, + "learning_rate": 2.7858041087596947e-06, + "loss": 0.7973, + "step": 5042 + }, + { + "epoch": 1.05, + "learning_rate": 2.7848232433423e-06, + "loss": 0.9125, + "step": 5043 + }, + { + "epoch": 1.05, + "learning_rate": 2.783842401045633e-06, + "loss": 0.9479, + "step": 5044 + }, + { + "epoch": 1.05, + "learning_rate": 2.782861581975085e-06, + "loss": 0.728, + "step": 5045 + }, + { + "epoch": 1.05, + "learning_rate": 2.7818807862360448e-06, + "loss": 0.9541, + "step": 5046 + }, + { + "epoch": 1.05, + "learning_rate": 2.780900013933897e-06, + "loss": 0.8156, + "step": 5047 + }, + { + "epoch": 1.05, + "learning_rate": 2.7799192651740286e-06, + "loss": 0.8869, + "step": 5048 + }, + { + "epoch": 1.05, + "learning_rate": 2.778938540061819e-06, + "loss": 0.9201, + "step": 5049 + }, + { + "epoch": 1.05, + "learning_rate": 2.7779578387026474e-06, + "loss": 0.8351, + "step": 5050 + }, + { + "epoch": 1.05, + "learning_rate": 2.7769771612018894e-06, + "loss": 1.0088, + "step": 5051 + }, + { + "epoch": 1.05, + "learning_rate": 2.7759965076649203e-06, + "loss": 0.8211, + "step": 5052 + }, + { + "epoch": 1.05, + "learning_rate": 2.775015878197111e-06, + "loss": 0.7549, + "step": 5053 + }, + { + "epoch": 1.05, + "learning_rate": 2.774035272903829e-06, + "loss": 0.7762, + "step": 5054 + }, + { + "epoch": 1.05, + "learning_rate": 2.77305469189044e-06, + "loss": 0.9624, + "step": 5055 + }, + { + "epoch": 1.05, + "learning_rate": 2.772074135262309e-06, + "loss": 0.9319, + "step": 5056 + }, + { + "epoch": 1.05, + "learning_rate": 2.7710936031247966e-06, + "loss": 0.9544, + "step": 5057 + }, + { + "epoch": 1.05, + "learning_rate": 2.7701130955832594e-06, + "loss": 1.2526, + "step": 5058 + }, + { + "epoch": 1.05, + "learning_rate": 2.769132612743053e-06, + "loss": 1.1122, + "step": 5059 + }, + { + "epoch": 1.05, + "learning_rate": 2.768152154709532e-06, + "loss": 0.7325, + "step": 5060 + }, + { + "epoch": 1.05, + "learning_rate": 2.767171721588045e-06, + "loss": 0.9479, + "step": 5061 + }, + { + "epoch": 1.05, + "learning_rate": 2.7661913134839404e-06, + "loss": 0.9745, + "step": 5062 + }, + { + "epoch": 1.05, + "learning_rate": 2.765210930502561e-06, + "loss": 0.9945, + "step": 5063 + }, + { + "epoch": 1.05, + "learning_rate": 2.7642305727492516e-06, + "loss": 1.0837, + "step": 5064 + }, + { + "epoch": 1.05, + "learning_rate": 2.7632502403293503e-06, + "loss": 0.8235, + "step": 5065 + }, + { + "epoch": 1.05, + "learning_rate": 2.762269933348194e-06, + "loss": 0.7867, + "step": 5066 + }, + { + "epoch": 1.05, + "learning_rate": 2.7612896519111148e-06, + "loss": 0.9705, + "step": 5067 + }, + { + "epoch": 1.05, + "learning_rate": 2.7603093961234473e-06, + "loss": 0.8995, + "step": 5068 + }, + { + "epoch": 1.05, + "learning_rate": 2.7593291660905175e-06, + "loss": 0.9968, + "step": 5069 + }, + { + "epoch": 1.05, + "learning_rate": 2.758348961917652e-06, + "loss": 0.9433, + "step": 5070 + }, + { + "epoch": 1.05, + "learning_rate": 2.757368783710172e-06, + "loss": 1.2072, + "step": 5071 + }, + { + "epoch": 1.05, + "learning_rate": 2.7563886315734e-06, + "loss": 1.1102, + "step": 5072 + }, + { + "epoch": 1.06, + "learning_rate": 2.7554085056126523e-06, + "loss": 0.9284, + "step": 5073 + }, + { + "epoch": 1.06, + "learning_rate": 2.754428405933243e-06, + "loss": 0.8762, + "step": 5074 + }, + { + "epoch": 1.06, + "learning_rate": 2.753448332640483e-06, + "loss": 0.9954, + "step": 5075 + }, + { + "epoch": 1.06, + "learning_rate": 2.752468285839683e-06, + "loss": 0.977, + "step": 5076 + }, + { + "epoch": 1.06, + "learning_rate": 2.7514882656361484e-06, + "loss": 0.9597, + "step": 5077 + }, + { + "epoch": 1.06, + "learning_rate": 2.7505082721351815e-06, + "loss": 1.026, + "step": 5078 + }, + { + "epoch": 1.06, + "learning_rate": 2.749528305442082e-06, + "loss": 1.0157, + "step": 5079 + }, + { + "epoch": 1.06, + "learning_rate": 2.748548365662149e-06, + "loss": 0.9687, + "step": 5080 + }, + { + "epoch": 1.06, + "learning_rate": 2.747568452900677e-06, + "loss": 0.863, + "step": 5081 + }, + { + "epoch": 1.06, + "learning_rate": 2.746588567262956e-06, + "loss": 0.8272, + "step": 5082 + }, + { + "epoch": 1.06, + "learning_rate": 2.745608708854274e-06, + "loss": 0.737, + "step": 5083 + }, + { + "epoch": 1.06, + "learning_rate": 2.74462887777992e-06, + "loss": 0.9075, + "step": 5084 + }, + { + "epoch": 1.06, + "learning_rate": 2.7436490741451745e-06, + "loss": 0.913, + "step": 5085 + }, + { + "epoch": 1.06, + "learning_rate": 2.742669298055318e-06, + "loss": 0.8773, + "step": 5086 + }, + { + "epoch": 1.06, + "learning_rate": 2.7416895496156253e-06, + "loss": 0.8703, + "step": 5087 + }, + { + "epoch": 1.06, + "learning_rate": 2.7407098289313737e-06, + "loss": 0.9843, + "step": 5088 + }, + { + "epoch": 1.06, + "learning_rate": 2.7397301361078322e-06, + "loss": 0.6712, + "step": 5089 + }, + { + "epoch": 1.06, + "learning_rate": 2.7387504712502694e-06, + "loss": 1.0755, + "step": 5090 + }, + { + "epoch": 1.06, + "learning_rate": 2.737770834463948e-06, + "loss": 0.7956, + "step": 5091 + }, + { + "epoch": 1.06, + "learning_rate": 2.7367912258541332e-06, + "loss": 0.9749, + "step": 5092 + }, + { + "epoch": 1.06, + "learning_rate": 2.7358116455260828e-06, + "loss": 0.847, + "step": 5093 + }, + { + "epoch": 1.06, + "learning_rate": 2.7348320935850506e-06, + "loss": 0.922, + "step": 5094 + }, + { + "epoch": 1.06, + "learning_rate": 2.733852570136292e-06, + "loss": 1.0573, + "step": 5095 + }, + { + "epoch": 1.06, + "learning_rate": 2.732873075285056e-06, + "loss": 0.9509, + "step": 5096 + }, + { + "epoch": 1.06, + "learning_rate": 2.7318936091365896e-06, + "loss": 0.9445, + "step": 5097 + }, + { + "epoch": 1.06, + "learning_rate": 2.7309141717961337e-06, + "loss": 1.0337, + "step": 5098 + }, + { + "epoch": 1.06, + "learning_rate": 2.7299347633689327e-06, + "loss": 0.9722, + "step": 5099 + }, + { + "epoch": 1.06, + "learning_rate": 2.7289553839602216e-06, + "loss": 0.983, + "step": 5100 + }, + { + "epoch": 1.06, + "learning_rate": 2.7279760336752354e-06, + "loss": 0.8549, + "step": 5101 + }, + { + "epoch": 1.06, + "learning_rate": 2.7269967126192038e-06, + "loss": 0.9359, + "step": 5102 + }, + { + "epoch": 1.06, + "learning_rate": 2.7260174208973568e-06, + "loss": 0.9654, + "step": 5103 + }, + { + "epoch": 1.06, + "learning_rate": 2.7250381586149187e-06, + "loss": 0.9362, + "step": 5104 + }, + { + "epoch": 1.06, + "learning_rate": 2.724058925877111e-06, + "loss": 1.0239, + "step": 5105 + }, + { + "epoch": 1.06, + "learning_rate": 2.7230797227891507e-06, + "loss": 0.9569, + "step": 5106 + }, + { + "epoch": 1.06, + "learning_rate": 2.722100549456255e-06, + "loss": 0.9636, + "step": 5107 + }, + { + "epoch": 1.06, + "learning_rate": 2.721121405983636e-06, + "loss": 0.7328, + "step": 5108 + }, + { + "epoch": 1.06, + "learning_rate": 2.7201422924765023e-06, + "loss": 1.0647, + "step": 5109 + }, + { + "epoch": 1.06, + "learning_rate": 2.719163209040058e-06, + "loss": 0.7425, + "step": 5110 + }, + { + "epoch": 1.06, + "learning_rate": 2.7181841557795074e-06, + "loss": 0.9469, + "step": 5111 + }, + { + "epoch": 1.06, + "learning_rate": 2.7172051328000497e-06, + "loss": 0.9559, + "step": 5112 + }, + { + "epoch": 1.06, + "learning_rate": 2.71622614020688e-06, + "loss": 0.9677, + "step": 5113 + }, + { + "epoch": 1.06, + "learning_rate": 2.71524717810519e-06, + "loss": 1.0308, + "step": 5114 + }, + { + "epoch": 1.06, + "learning_rate": 2.714268246600172e-06, + "loss": 0.7548, + "step": 5115 + }, + { + "epoch": 1.06, + "learning_rate": 2.7132893457970107e-06, + "loss": 0.8815, + "step": 5116 + }, + { + "epoch": 1.06, + "learning_rate": 2.7123104758008885e-06, + "loss": 0.9685, + "step": 5117 + }, + { + "epoch": 1.06, + "learning_rate": 2.7113316367169835e-06, + "loss": 0.7969, + "step": 5118 + }, + { + "epoch": 1.06, + "learning_rate": 2.710352828650475e-06, + "loss": 0.7693, + "step": 5119 + }, + { + "epoch": 1.06, + "learning_rate": 2.709374051706535e-06, + "loss": 0.8285, + "step": 5120 + }, + { + "epoch": 1.07, + "learning_rate": 2.7083953059903314e-06, + "loss": 0.9948, + "step": 5121 + }, + { + "epoch": 1.07, + "learning_rate": 2.707416591607031e-06, + "loss": 0.7371, + "step": 5122 + }, + { + "epoch": 1.07, + "learning_rate": 2.7064379086617978e-06, + "loss": 0.7764, + "step": 5123 + }, + { + "epoch": 1.07, + "learning_rate": 2.70545925725979e-06, + "loss": 0.7368, + "step": 5124 + }, + { + "epoch": 1.07, + "learning_rate": 2.7044806375061645e-06, + "loss": 0.7218, + "step": 5125 + }, + { + "epoch": 1.07, + "learning_rate": 2.7035020495060723e-06, + "loss": 0.8972, + "step": 5126 + }, + { + "epoch": 1.07, + "learning_rate": 2.7025234933646646e-06, + "loss": 0.8921, + "step": 5127 + }, + { + "epoch": 1.07, + "learning_rate": 2.7015449691870866e-06, + "loss": 0.8893, + "step": 5128 + }, + { + "epoch": 1.07, + "learning_rate": 2.70056647707848e-06, + "loss": 0.9701, + "step": 5129 + }, + { + "epoch": 1.07, + "learning_rate": 2.699588017143983e-06, + "loss": 0.8893, + "step": 5130 + }, + { + "epoch": 1.07, + "learning_rate": 2.698609589488733e-06, + "loss": 1.071, + "step": 5131 + }, + { + "epoch": 1.07, + "learning_rate": 2.697631194217862e-06, + "loss": 0.8426, + "step": 5132 + }, + { + "epoch": 1.07, + "learning_rate": 2.6966528314364973e-06, + "loss": 0.8375, + "step": 5133 + }, + { + "epoch": 1.07, + "learning_rate": 2.6956745012497625e-06, + "loss": 0.7207, + "step": 5134 + }, + { + "epoch": 1.07, + "learning_rate": 2.694696203762782e-06, + "loss": 0.7397, + "step": 5135 + }, + { + "epoch": 1.07, + "learning_rate": 2.6937179390806732e-06, + "loss": 0.972, + "step": 5136 + }, + { + "epoch": 1.07, + "learning_rate": 2.6927397073085498e-06, + "loss": 0.8002, + "step": 5137 + }, + { + "epoch": 1.07, + "learning_rate": 2.6917615085515213e-06, + "loss": 1.143, + "step": 5138 + }, + { + "epoch": 1.07, + "learning_rate": 2.6907833429146982e-06, + "loss": 0.9981, + "step": 5139 + }, + { + "epoch": 1.07, + "learning_rate": 2.689805210503183e-06, + "loss": 0.8833, + "step": 5140 + }, + { + "epoch": 1.07, + "learning_rate": 2.688827111422076e-06, + "loss": 0.7883, + "step": 5141 + }, + { + "epoch": 1.07, + "learning_rate": 2.687849045776472e-06, + "loss": 0.886, + "step": 5142 + }, + { + "epoch": 1.07, + "learning_rate": 2.686871013671467e-06, + "loss": 0.9139, + "step": 5143 + }, + { + "epoch": 1.07, + "learning_rate": 2.6858930152121497e-06, + "loss": 1.0063, + "step": 5144 + }, + { + "epoch": 1.07, + "learning_rate": 2.684915050503605e-06, + "loss": 0.8107, + "step": 5145 + }, + { + "epoch": 1.07, + "learning_rate": 2.6839371196509147e-06, + "loss": 0.8096, + "step": 5146 + }, + { + "epoch": 1.07, + "learning_rate": 2.682959222759159e-06, + "loss": 0.9493, + "step": 5147 + }, + { + "epoch": 1.07, + "learning_rate": 2.6819813599334127e-06, + "loss": 0.7603, + "step": 5148 + }, + { + "epoch": 1.07, + "learning_rate": 2.6810035312787467e-06, + "loss": 0.8316, + "step": 5149 + }, + { + "epoch": 1.07, + "learning_rate": 2.680025736900227e-06, + "loss": 0.7575, + "step": 5150 + }, + { + "epoch": 1.07, + "learning_rate": 2.6790479769029207e-06, + "loss": 0.8597, + "step": 5151 + }, + { + "epoch": 1.07, + "learning_rate": 2.6780702513918857e-06, + "loss": 0.8572, + "step": 5152 + }, + { + "epoch": 1.07, + "learning_rate": 2.6770925604721796e-06, + "loss": 1.0721, + "step": 5153 + }, + { + "epoch": 1.07, + "learning_rate": 2.6761149042488536e-06, + "loss": 0.9597, + "step": 5154 + }, + { + "epoch": 1.07, + "learning_rate": 2.675137282826959e-06, + "loss": 0.9718, + "step": 5155 + }, + { + "epoch": 1.07, + "learning_rate": 2.6741596963115405e-06, + "loss": 0.9384, + "step": 5156 + }, + { + "epoch": 1.07, + "learning_rate": 2.673182144807638e-06, + "loss": 0.8398, + "step": 5157 + }, + { + "epoch": 1.07, + "learning_rate": 2.672204628420292e-06, + "loss": 1.0086, + "step": 5158 + }, + { + "epoch": 1.07, + "learning_rate": 2.6712271472545354e-06, + "loss": 0.8579, + "step": 5159 + }, + { + "epoch": 1.07, + "learning_rate": 2.6702497014153986e-06, + "loss": 1.0966, + "step": 5160 + }, + { + "epoch": 1.07, + "learning_rate": 2.6692722910079058e-06, + "loss": 0.8094, + "step": 5161 + }, + { + "epoch": 1.07, + "learning_rate": 2.6682949161370835e-06, + "loss": 0.8408, + "step": 5162 + }, + { + "epoch": 1.07, + "learning_rate": 2.6673175769079487e-06, + "loss": 0.7665, + "step": 5163 + }, + { + "epoch": 1.07, + "learning_rate": 2.666340273425517e-06, + "loss": 0.8694, + "step": 5164 + }, + { + "epoch": 1.07, + "learning_rate": 2.6653630057947967e-06, + "loss": 0.8964, + "step": 5165 + }, + { + "epoch": 1.07, + "learning_rate": 2.6643857741207997e-06, + "loss": 0.8647, + "step": 5166 + }, + { + "epoch": 1.07, + "learning_rate": 2.663408578508527e-06, + "loss": 1.179, + "step": 5167 + }, + { + "epoch": 1.07, + "learning_rate": 2.662431419062979e-06, + "loss": 0.9626, + "step": 5168 + }, + { + "epoch": 1.08, + "learning_rate": 2.6614542958891493e-06, + "loss": 0.975, + "step": 5169 + }, + { + "epoch": 1.08, + "learning_rate": 2.6604772090920322e-06, + "loss": 0.9602, + "step": 5170 + }, + { + "epoch": 1.08, + "learning_rate": 2.6595001587766153e-06, + "loss": 0.8227, + "step": 5171 + }, + { + "epoch": 1.08, + "learning_rate": 2.658523145047882e-06, + "loss": 1.0961, + "step": 5172 + }, + { + "epoch": 1.08, + "learning_rate": 2.6575461680108113e-06, + "loss": 0.8232, + "step": 5173 + }, + { + "epoch": 1.08, + "learning_rate": 2.656569227770381e-06, + "loss": 0.9709, + "step": 5174 + }, + { + "epoch": 1.08, + "learning_rate": 2.655592324431563e-06, + "loss": 0.776, + "step": 5175 + }, + { + "epoch": 1.08, + "learning_rate": 2.6546154580993255e-06, + "loss": 0.9439, + "step": 5176 + }, + { + "epoch": 1.08, + "learning_rate": 2.653638628878631e-06, + "loss": 0.8315, + "step": 5177 + }, + { + "epoch": 1.08, + "learning_rate": 2.652661836874442e-06, + "loss": 0.8479, + "step": 5178 + }, + { + "epoch": 1.08, + "learning_rate": 2.6516850821917143e-06, + "loss": 0.8588, + "step": 5179 + }, + { + "epoch": 1.08, + "learning_rate": 2.6507083649353994e-06, + "loss": 0.8504, + "step": 5180 + }, + { + "epoch": 1.08, + "learning_rate": 2.6497316852104448e-06, + "loss": 0.7226, + "step": 5181 + }, + { + "epoch": 1.08, + "learning_rate": 2.6487550431217966e-06, + "loss": 0.8435, + "step": 5182 + }, + { + "epoch": 1.08, + "learning_rate": 2.647778438774394e-06, + "loss": 1.0032, + "step": 5183 + }, + { + "epoch": 1.08, + "learning_rate": 2.646801872273173e-06, + "loss": 1.0002, + "step": 5184 + }, + { + "epoch": 1.08, + "learning_rate": 2.6458253437230645e-06, + "loss": 0.9403, + "step": 5185 + }, + { + "epoch": 1.08, + "learning_rate": 2.6448488532289985e-06, + "loss": 0.8692, + "step": 5186 + }, + { + "epoch": 1.08, + "learning_rate": 2.6438724008958983e-06, + "loss": 0.8035, + "step": 5187 + }, + { + "epoch": 1.08, + "learning_rate": 2.642895986828683e-06, + "loss": 1.0486, + "step": 5188 + }, + { + "epoch": 1.08, + "learning_rate": 2.641919611132267e-06, + "loss": 0.7863, + "step": 5189 + }, + { + "epoch": 1.08, + "learning_rate": 2.6409432739115643e-06, + "loss": 1.2217, + "step": 5190 + }, + { + "epoch": 1.08, + "learning_rate": 2.639966975271482e-06, + "loss": 0.8663, + "step": 5191 + }, + { + "epoch": 1.08, + "learning_rate": 2.638990715316922e-06, + "loss": 1.0437, + "step": 5192 + }, + { + "epoch": 1.08, + "learning_rate": 2.6380144941527826e-06, + "loss": 0.8771, + "step": 5193 + }, + { + "epoch": 1.08, + "learning_rate": 2.6370383118839615e-06, + "loss": 0.827, + "step": 5194 + }, + { + "epoch": 1.08, + "learning_rate": 2.6360621686153475e-06, + "loss": 0.9614, + "step": 5195 + }, + { + "epoch": 1.08, + "learning_rate": 2.6350860644518275e-06, + "loss": 0.897, + "step": 5196 + }, + { + "epoch": 1.08, + "learning_rate": 2.6341099994982827e-06, + "loss": 0.9184, + "step": 5197 + }, + { + "epoch": 1.08, + "learning_rate": 2.633133973859594e-06, + "loss": 0.7468, + "step": 5198 + }, + { + "epoch": 1.08, + "learning_rate": 2.632157987640633e-06, + "loss": 0.8127, + "step": 5199 + }, + { + "epoch": 1.08, + "learning_rate": 2.6311820409462704e-06, + "loss": 0.8456, + "step": 5200 + }, + { + "epoch": 1.08, + "learning_rate": 2.63020613388137e-06, + "loss": 0.9151, + "step": 5201 + }, + { + "epoch": 1.08, + "learning_rate": 2.629230266550795e-06, + "loss": 1.0365, + "step": 5202 + }, + { + "epoch": 1.08, + "learning_rate": 2.628254439059402e-06, + "loss": 0.8518, + "step": 5203 + }, + { + "epoch": 1.08, + "learning_rate": 2.6272786515120424e-06, + "loss": 0.9852, + "step": 5204 + }, + { + "epoch": 1.08, + "learning_rate": 2.626302904013564e-06, + "loss": 0.7815, + "step": 5205 + }, + { + "epoch": 1.08, + "learning_rate": 2.625327196668813e-06, + "loss": 0.9495, + "step": 5206 + }, + { + "epoch": 1.08, + "learning_rate": 2.6243515295826283e-06, + "loss": 0.9349, + "step": 5207 + }, + { + "epoch": 1.08, + "learning_rate": 2.623375902859845e-06, + "loss": 0.8513, + "step": 5208 + }, + { + "epoch": 1.08, + "learning_rate": 2.6224003166052922e-06, + "loss": 0.8394, + "step": 5209 + }, + { + "epoch": 1.08, + "learning_rate": 2.6214247709238007e-06, + "loss": 0.9631, + "step": 5210 + }, + { + "epoch": 1.08, + "learning_rate": 2.6204492659201897e-06, + "loss": 0.8531, + "step": 5211 + }, + { + "epoch": 1.08, + "learning_rate": 2.619473801699278e-06, + "loss": 1.0087, + "step": 5212 + }, + { + "epoch": 1.08, + "learning_rate": 2.6184983783658783e-06, + "loss": 0.7728, + "step": 5213 + }, + { + "epoch": 1.08, + "learning_rate": 2.617522996024802e-06, + "loss": 0.8124, + "step": 5214 + }, + { + "epoch": 1.08, + "learning_rate": 2.6165476547808523e-06, + "loss": 0.8797, + "step": 5215 + }, + { + "epoch": 1.08, + "learning_rate": 2.6155723547388294e-06, + "loss": 0.8628, + "step": 5216 + }, + { + "epoch": 1.09, + "learning_rate": 2.6145970960035287e-06, + "loss": 0.7396, + "step": 5217 + }, + { + "epoch": 1.09, + "learning_rate": 2.6136218786797437e-06, + "loss": 0.7556, + "step": 5218 + }, + { + "epoch": 1.09, + "learning_rate": 2.61264670287226e-06, + "loss": 0.8182, + "step": 5219 + }, + { + "epoch": 1.09, + "learning_rate": 2.6116715686858605e-06, + "loss": 0.7876, + "step": 5220 + }, + { + "epoch": 1.09, + "learning_rate": 2.6106964762253224e-06, + "loss": 0.9361, + "step": 5221 + }, + { + "epoch": 1.09, + "learning_rate": 2.609721425595421e-06, + "loss": 0.936, + "step": 5222 + }, + { + "epoch": 1.09, + "learning_rate": 2.608746416900925e-06, + "loss": 0.9006, + "step": 5223 + }, + { + "epoch": 1.09, + "learning_rate": 2.6077714502465974e-06, + "loss": 0.7237, + "step": 5224 + }, + { + "epoch": 1.09, + "learning_rate": 2.6067965257371996e-06, + "loss": 0.9579, + "step": 5225 + }, + { + "epoch": 1.09, + "learning_rate": 2.6058216434774877e-06, + "loss": 0.7425, + "step": 5226 + }, + { + "epoch": 1.09, + "learning_rate": 2.6048468035722123e-06, + "loss": 0.9062, + "step": 5227 + }, + { + "epoch": 1.09, + "learning_rate": 2.6038720061261186e-06, + "loss": 1.0381, + "step": 5228 + }, + { + "epoch": 1.09, + "learning_rate": 2.6028972512439505e-06, + "loss": 1.0303, + "step": 5229 + }, + { + "epoch": 1.09, + "learning_rate": 2.6019225390304438e-06, + "loss": 0.6919, + "step": 5230 + }, + { + "epoch": 1.09, + "learning_rate": 2.600947869590333e-06, + "loss": 0.923, + "step": 5231 + }, + { + "epoch": 1.09, + "learning_rate": 2.599973243028344e-06, + "loss": 0.7918, + "step": 5232 + }, + { + "epoch": 1.09, + "learning_rate": 2.5989986594492014e-06, + "loss": 0.9626, + "step": 5233 + }, + { + "epoch": 1.09, + "learning_rate": 2.598024118957625e-06, + "loss": 0.8959, + "step": 5234 + }, + { + "epoch": 1.09, + "learning_rate": 2.597049621658328e-06, + "loss": 0.7392, + "step": 5235 + }, + { + "epoch": 1.09, + "learning_rate": 2.59607516765602e-06, + "loss": 0.9978, + "step": 5236 + }, + { + "epoch": 1.09, + "learning_rate": 2.5951007570554066e-06, + "loss": 0.7804, + "step": 5237 + }, + { + "epoch": 1.09, + "learning_rate": 2.5941263899611886e-06, + "loss": 1.0637, + "step": 5238 + }, + { + "epoch": 1.09, + "learning_rate": 2.5931520664780607e-06, + "loss": 0.9909, + "step": 5239 + }, + { + "epoch": 1.09, + "learning_rate": 2.592177786710714e-06, + "loss": 1.0615, + "step": 5240 + }, + { + "epoch": 1.09, + "learning_rate": 2.5912035507638346e-06, + "loss": 0.8195, + "step": 5241 + }, + { + "epoch": 1.09, + "learning_rate": 2.5902293587421057e-06, + "loss": 0.8515, + "step": 5242 + }, + { + "epoch": 1.09, + "learning_rate": 2.5892552107502026e-06, + "loss": 0.8845, + "step": 5243 + }, + { + "epoch": 1.09, + "learning_rate": 2.588281106892797e-06, + "loss": 0.9361, + "step": 5244 + }, + { + "epoch": 1.09, + "learning_rate": 2.5873070472745573e-06, + "loss": 0.8893, + "step": 5245 + }, + { + "epoch": 1.09, + "learning_rate": 2.5863330320001466e-06, + "loss": 0.9266, + "step": 5246 + }, + { + "epoch": 1.09, + "learning_rate": 2.585359061174222e-06, + "loss": 0.8823, + "step": 5247 + }, + { + "epoch": 1.09, + "learning_rate": 2.584385134901436e-06, + "loss": 0.8765, + "step": 5248 + }, + { + "epoch": 1.09, + "learning_rate": 2.5834112532864375e-06, + "loss": 0.95, + "step": 5249 + }, + { + "epoch": 1.09, + "learning_rate": 2.5824374164338713e-06, + "loss": 0.7734, + "step": 5250 + }, + { + "epoch": 1.09, + "learning_rate": 2.5814636244483743e-06, + "loss": 1.1288, + "step": 5251 + }, + { + "epoch": 1.09, + "learning_rate": 2.5804898774345805e-06, + "loss": 0.8314, + "step": 5252 + }, + { + "epoch": 1.09, + "learning_rate": 2.5795161754971193e-06, + "loss": 0.9537, + "step": 5253 + }, + { + "epoch": 1.09, + "learning_rate": 2.578542518740616e-06, + "loss": 1.01, + "step": 5254 + }, + { + "epoch": 1.09, + "learning_rate": 2.577568907269689e-06, + "loss": 1.0062, + "step": 5255 + }, + { + "epoch": 1.09, + "learning_rate": 2.5765953411889523e-06, + "loss": 0.8384, + "step": 5256 + }, + { + "epoch": 1.09, + "learning_rate": 2.5756218206030154e-06, + "loss": 1.0225, + "step": 5257 + }, + { + "epoch": 1.09, + "learning_rate": 2.5746483456164844e-06, + "loss": 1.1161, + "step": 5258 + }, + { + "epoch": 1.09, + "learning_rate": 2.5736749163339585e-06, + "loss": 0.7405, + "step": 5259 + }, + { + "epoch": 1.09, + "learning_rate": 2.572701532860032e-06, + "loss": 0.8706, + "step": 5260 + }, + { + "epoch": 1.09, + "learning_rate": 2.5717281952992955e-06, + "loss": 0.8597, + "step": 5261 + }, + { + "epoch": 1.09, + "learning_rate": 2.5707549037563335e-06, + "loss": 0.9166, + "step": 5262 + }, + { + "epoch": 1.09, + "learning_rate": 2.5697816583357267e-06, + "loss": 0.9452, + "step": 5263 + }, + { + "epoch": 1.09, + "learning_rate": 2.5688084591420497e-06, + "loss": 0.8585, + "step": 5264 + }, + { + "epoch": 1.1, + "learning_rate": 2.5678353062798734e-06, + "loss": 0.8866, + "step": 5265 + }, + { + "epoch": 1.1, + "learning_rate": 2.566862199853762e-06, + "loss": 0.8755, + "step": 5266 + }, + { + "epoch": 1.1, + "learning_rate": 2.565889139968277e-06, + "loss": 0.9105, + "step": 5267 + }, + { + "epoch": 1.1, + "learning_rate": 2.5649161267279728e-06, + "loss": 0.9641, + "step": 5268 + }, + { + "epoch": 1.1, + "learning_rate": 2.5639431602374e-06, + "loss": 0.8825, + "step": 5269 + }, + { + "epoch": 1.1, + "learning_rate": 2.5629702406011023e-06, + "loss": 1.1768, + "step": 5270 + }, + { + "epoch": 1.1, + "learning_rate": 2.5619973679236226e-06, + "loss": 0.8002, + "step": 5271 + }, + { + "epoch": 1.1, + "learning_rate": 2.5610245423094936e-06, + "loss": 1.0945, + "step": 5272 + }, + { + "epoch": 1.1, + "learning_rate": 2.5600517638632463e-06, + "loss": 0.987, + "step": 5273 + }, + { + "epoch": 1.1, + "learning_rate": 2.5590790326894056e-06, + "loss": 1.0344, + "step": 5274 + }, + { + "epoch": 1.1, + "learning_rate": 2.5581063488924914e-06, + "loss": 1.0447, + "step": 5275 + }, + { + "epoch": 1.1, + "learning_rate": 2.5571337125770183e-06, + "loss": 0.9885, + "step": 5276 + }, + { + "epoch": 1.1, + "learning_rate": 2.5561611238474966e-06, + "loss": 0.9099, + "step": 5277 + }, + { + "epoch": 1.1, + "learning_rate": 2.5551885828084303e-06, + "loss": 0.9503, + "step": 5278 + }, + { + "epoch": 1.1, + "learning_rate": 2.554216089564319e-06, + "loss": 0.7687, + "step": 5279 + }, + { + "epoch": 1.1, + "learning_rate": 2.553243644219657e-06, + "loss": 0.873, + "step": 5280 + }, + { + "epoch": 1.1, + "learning_rate": 2.5522712468789337e-06, + "loss": 0.7234, + "step": 5281 + }, + { + "epoch": 1.1, + "learning_rate": 2.5512988976466327e-06, + "loss": 0.9265, + "step": 5282 + }, + { + "epoch": 1.1, + "learning_rate": 2.5503265966272337e-06, + "loss": 0.9434, + "step": 5283 + }, + { + "epoch": 1.1, + "learning_rate": 2.5493543439252088e-06, + "loss": 0.8885, + "step": 5284 + }, + { + "epoch": 1.1, + "learning_rate": 2.5483821396450284e-06, + "loss": 0.8281, + "step": 5285 + }, + { + "epoch": 1.1, + "learning_rate": 2.5474099838911543e-06, + "loss": 0.824, + "step": 5286 + }, + { + "epoch": 1.1, + "learning_rate": 2.546437876768046e-06, + "loss": 0.8565, + "step": 5287 + }, + { + "epoch": 1.1, + "learning_rate": 2.5454658183801542e-06, + "loss": 0.7567, + "step": 5288 + }, + { + "epoch": 1.1, + "learning_rate": 2.544493808831929e-06, + "loss": 0.9076, + "step": 5289 + }, + { + "epoch": 1.1, + "learning_rate": 2.5435218482278107e-06, + "loss": 0.9721, + "step": 5290 + }, + { + "epoch": 1.1, + "learning_rate": 2.542549936672237e-06, + "loss": 1.014, + "step": 5291 + }, + { + "epoch": 1.1, + "learning_rate": 2.541578074269641e-06, + "loss": 1.1479, + "step": 5292 + }, + { + "epoch": 1.1, + "learning_rate": 2.540606261124448e-06, + "loss": 0.7412, + "step": 5293 + }, + { + "epoch": 1.1, + "learning_rate": 2.539634497341079e-06, + "loss": 1.0028, + "step": 5294 + }, + { + "epoch": 1.1, + "learning_rate": 2.53866278302395e-06, + "loss": 0.8662, + "step": 5295 + }, + { + "epoch": 1.1, + "learning_rate": 2.5376911182774728e-06, + "loss": 0.8552, + "step": 5296 + }, + { + "epoch": 1.1, + "learning_rate": 2.536719503206052e-06, + "loss": 1.0158, + "step": 5297 + }, + { + "epoch": 1.1, + "learning_rate": 2.5357479379140873e-06, + "loss": 0.8522, + "step": 5298 + }, + { + "epoch": 1.1, + "learning_rate": 2.534776422505973e-06, + "loss": 0.7945, + "step": 5299 + }, + { + "epoch": 1.1, + "learning_rate": 2.5338049570861e-06, + "loss": 0.9268, + "step": 5300 + }, + { + "epoch": 1.1, + "learning_rate": 2.5328335417588506e-06, + "loss": 0.9811, + "step": 5301 + }, + { + "epoch": 1.1, + "learning_rate": 2.5318621766286033e-06, + "loss": 0.8622, + "step": 5302 + }, + { + "epoch": 1.1, + "learning_rate": 2.530890861799732e-06, + "loss": 0.9728, + "step": 5303 + }, + { + "epoch": 1.1, + "learning_rate": 2.5299195973766048e-06, + "loss": 0.9703, + "step": 5304 + }, + { + "epoch": 1.1, + "learning_rate": 2.5289483834635827e-06, + "loss": 0.993, + "step": 5305 + }, + { + "epoch": 1.1, + "learning_rate": 2.527977220165023e-06, + "loss": 0.8691, + "step": 5306 + }, + { + "epoch": 1.1, + "learning_rate": 2.5270061075852772e-06, + "loss": 0.9669, + "step": 5307 + }, + { + "epoch": 1.1, + "learning_rate": 2.5260350458286926e-06, + "loss": 1.0025, + "step": 5308 + }, + { + "epoch": 1.1, + "learning_rate": 2.525064034999608e-06, + "loss": 0.8415, + "step": 5309 + }, + { + "epoch": 1.1, + "learning_rate": 2.524093075202358e-06, + "loss": 0.7974, + "step": 5310 + }, + { + "epoch": 1.1, + "learning_rate": 2.5231221665412733e-06, + "loss": 1.1382, + "step": 5311 + }, + { + "epoch": 1.1, + "learning_rate": 2.5221513091206785e-06, + "loss": 0.8827, + "step": 5312 + }, + { + "epoch": 1.11, + "learning_rate": 2.5211805030448917e-06, + "loss": 0.9453, + "step": 5313 + }, + { + "epoch": 1.11, + "learning_rate": 2.5202097484182244e-06, + "loss": 0.8106, + "step": 5314 + }, + { + "epoch": 1.11, + "learning_rate": 2.5192390453449864e-06, + "loss": 0.7253, + "step": 5315 + }, + { + "epoch": 1.11, + "learning_rate": 2.5182683939294786e-06, + "loss": 0.7936, + "step": 5316 + }, + { + "epoch": 1.11, + "learning_rate": 2.5172977942759975e-06, + "loss": 1.0965, + "step": 5317 + }, + { + "epoch": 1.11, + "learning_rate": 2.516327246488833e-06, + "loss": 0.876, + "step": 5318 + }, + { + "epoch": 1.11, + "learning_rate": 2.515356750672272e-06, + "loss": 0.7897, + "step": 5319 + }, + { + "epoch": 1.11, + "learning_rate": 2.5143863069305937e-06, + "loss": 1.0369, + "step": 5320 + }, + { + "epoch": 1.11, + "learning_rate": 2.513415915368073e-06, + "loss": 1.0007, + "step": 5321 + }, + { + "epoch": 1.11, + "learning_rate": 2.5124455760889766e-06, + "loss": 1.0471, + "step": 5322 + }, + { + "epoch": 1.11, + "learning_rate": 2.511475289197567e-06, + "loss": 0.8659, + "step": 5323 + }, + { + "epoch": 1.11, + "learning_rate": 2.510505054798104e-06, + "loss": 1.0309, + "step": 5324 + }, + { + "epoch": 1.11, + "learning_rate": 2.5095348729948387e-06, + "loss": 0.8687, + "step": 5325 + }, + { + "epoch": 1.11, + "learning_rate": 2.5085647438920156e-06, + "loss": 0.9464, + "step": 5326 + }, + { + "epoch": 1.11, + "learning_rate": 2.5075946675938745e-06, + "loss": 0.8846, + "step": 5327 + }, + { + "epoch": 1.11, + "learning_rate": 2.5066246442046524e-06, + "loss": 0.952, + "step": 5328 + }, + { + "epoch": 1.11, + "learning_rate": 2.505654673828577e-06, + "loss": 0.973, + "step": 5329 + }, + { + "epoch": 1.11, + "learning_rate": 2.504684756569872e-06, + "loss": 1.1483, + "step": 5330 + }, + { + "epoch": 1.11, + "learning_rate": 2.503714892532753e-06, + "loss": 0.792, + "step": 5331 + }, + { + "epoch": 1.11, + "learning_rate": 2.502745081821435e-06, + "loss": 0.9955, + "step": 5332 + }, + { + "epoch": 1.11, + "learning_rate": 2.501775324540122e-06, + "loss": 0.8566, + "step": 5333 + }, + { + "epoch": 1.11, + "learning_rate": 2.5008056207930154e-06, + "loss": 0.9623, + "step": 5334 + }, + { + "epoch": 1.11, + "learning_rate": 2.4998359706843078e-06, + "loss": 0.9417, + "step": 5335 + }, + { + "epoch": 1.11, + "learning_rate": 2.49886637431819e-06, + "loss": 1.1414, + "step": 5336 + }, + { + "epoch": 1.11, + "learning_rate": 2.497896831798845e-06, + "loss": 1.1319, + "step": 5337 + }, + { + "epoch": 1.11, + "learning_rate": 2.49692734323045e-06, + "loss": 1.1232, + "step": 5338 + }, + { + "epoch": 1.11, + "learning_rate": 2.4959579087171744e-06, + "loss": 0.7627, + "step": 5339 + }, + { + "epoch": 1.11, + "learning_rate": 2.494988528363187e-06, + "loss": 0.7209, + "step": 5340 + }, + { + "epoch": 1.11, + "learning_rate": 2.494019202272646e-06, + "loss": 1.159, + "step": 5341 + }, + { + "epoch": 1.11, + "learning_rate": 2.493049930549706e-06, + "loss": 0.8077, + "step": 5342 + }, + { + "epoch": 1.11, + "learning_rate": 2.492080713298513e-06, + "loss": 0.7464, + "step": 5343 + }, + { + "epoch": 1.11, + "learning_rate": 2.491111550623213e-06, + "loss": 0.9151, + "step": 5344 + }, + { + "epoch": 1.11, + "learning_rate": 2.49014244262794e-06, + "loss": 0.7375, + "step": 5345 + }, + { + "epoch": 1.11, + "learning_rate": 2.4891733894168255e-06, + "loss": 0.9447, + "step": 5346 + }, + { + "epoch": 1.11, + "learning_rate": 2.4882043910939924e-06, + "loss": 0.7676, + "step": 5347 + }, + { + "epoch": 1.11, + "learning_rate": 2.4872354477635617e-06, + "loss": 1.0621, + "step": 5348 + }, + { + "epoch": 1.11, + "learning_rate": 2.486266559529646e-06, + "loss": 0.8594, + "step": 5349 + }, + { + "epoch": 1.11, + "learning_rate": 2.485297726496352e-06, + "loss": 0.9788, + "step": 5350 + }, + { + "epoch": 1.11, + "learning_rate": 2.484328948767779e-06, + "loss": 0.8993, + "step": 5351 + }, + { + "epoch": 1.11, + "learning_rate": 2.4833602264480255e-06, + "loss": 0.8525, + "step": 5352 + }, + { + "epoch": 1.11, + "learning_rate": 2.4823915596411787e-06, + "loss": 1.002, + "step": 5353 + }, + { + "epoch": 1.11, + "learning_rate": 2.4814229484513215e-06, + "loss": 0.9537, + "step": 5354 + }, + { + "epoch": 1.11, + "learning_rate": 2.4804543929825302e-06, + "loss": 0.8692, + "step": 5355 + }, + { + "epoch": 1.11, + "learning_rate": 2.4794858933388785e-06, + "loss": 0.8332, + "step": 5356 + }, + { + "epoch": 1.11, + "learning_rate": 2.4785174496244307e-06, + "loss": 0.8221, + "step": 5357 + }, + { + "epoch": 1.11, + "learning_rate": 2.4775490619432444e-06, + "loss": 1.0476, + "step": 5358 + }, + { + "epoch": 1.11, + "learning_rate": 2.476580730399375e-06, + "loss": 0.7571, + "step": 5359 + }, + { + "epoch": 1.11, + "learning_rate": 2.4756124550968693e-06, + "loss": 0.8967, + "step": 5360 + }, + { + "epoch": 1.12, + "learning_rate": 2.474644236139768e-06, + "loss": 0.9004, + "step": 5361 + }, + { + "epoch": 1.12, + "learning_rate": 2.4736760736321043e-06, + "loss": 0.8777, + "step": 5362 + }, + { + "epoch": 1.12, + "learning_rate": 2.4727079676779104e-06, + "loss": 0.8797, + "step": 5363 + }, + { + "epoch": 1.12, + "learning_rate": 2.4717399183812073e-06, + "loss": 0.8421, + "step": 5364 + }, + { + "epoch": 1.12, + "learning_rate": 2.470771925846013e-06, + "loss": 0.6207, + "step": 5365 + }, + { + "epoch": 1.12, + "learning_rate": 2.4698039901763357e-06, + "loss": 0.7403, + "step": 5366 + }, + { + "epoch": 1.12, + "learning_rate": 2.4688361114761835e-06, + "loss": 1.0208, + "step": 5367 + }, + { + "epoch": 1.12, + "learning_rate": 2.467868289849553e-06, + "loss": 0.713, + "step": 5368 + }, + { + "epoch": 1.12, + "learning_rate": 2.466900525400437e-06, + "loss": 0.9579, + "step": 5369 + }, + { + "epoch": 1.12, + "learning_rate": 2.46593281823282e-06, + "loss": 0.7959, + "step": 5370 + }, + { + "epoch": 1.12, + "learning_rate": 2.464965168450685e-06, + "loss": 0.9666, + "step": 5371 + }, + { + "epoch": 1.12, + "learning_rate": 2.463997576158004e-06, + "loss": 0.7785, + "step": 5372 + }, + { + "epoch": 1.12, + "learning_rate": 2.463030041458746e-06, + "loss": 0.9794, + "step": 5373 + }, + { + "epoch": 1.12, + "learning_rate": 2.4620625644568695e-06, + "loss": 0.9188, + "step": 5374 + }, + { + "epoch": 1.12, + "learning_rate": 2.461095145256334e-06, + "loss": 0.8623, + "step": 5375 + }, + { + "epoch": 1.12, + "learning_rate": 2.460127783961087e-06, + "loss": 0.974, + "step": 5376 + }, + { + "epoch": 1.12, + "learning_rate": 2.4591604806750703e-06, + "loss": 0.9384, + "step": 5377 + }, + { + "epoch": 1.12, + "learning_rate": 2.4581932355022204e-06, + "loss": 0.8196, + "step": 5378 + }, + { + "epoch": 1.12, + "learning_rate": 2.4572260485464697e-06, + "loss": 1.0069, + "step": 5379 + }, + { + "epoch": 1.12, + "learning_rate": 2.4562589199117414e-06, + "loss": 0.8614, + "step": 5380 + }, + { + "epoch": 1.12, + "learning_rate": 2.455291849701954e-06, + "loss": 0.9308, + "step": 5381 + }, + { + "epoch": 1.12, + "learning_rate": 2.4543248380210166e-06, + "loss": 0.6554, + "step": 5382 + }, + { + "epoch": 1.12, + "learning_rate": 2.453357884972838e-06, + "loss": 0.8718, + "step": 5383 + }, + { + "epoch": 1.12, + "learning_rate": 2.452390990661315e-06, + "loss": 0.8975, + "step": 5384 + }, + { + "epoch": 1.12, + "learning_rate": 2.451424155190342e-06, + "loss": 0.7643, + "step": 5385 + }, + { + "epoch": 1.12, + "learning_rate": 2.450457378663802e-06, + "loss": 0.8519, + "step": 5386 + }, + { + "epoch": 1.12, + "learning_rate": 2.4494906611855797e-06, + "loss": 0.9264, + "step": 5387 + }, + { + "epoch": 1.12, + "learning_rate": 2.448524002859546e-06, + "loss": 0.9676, + "step": 5388 + }, + { + "epoch": 1.12, + "learning_rate": 2.447557403789569e-06, + "loss": 0.6134, + "step": 5389 + }, + { + "epoch": 1.12, + "learning_rate": 2.4465908640795088e-06, + "loss": 0.8566, + "step": 5390 + }, + { + "epoch": 1.12, + "learning_rate": 2.445624383833221e-06, + "loss": 0.8363, + "step": 5391 + }, + { + "epoch": 1.12, + "learning_rate": 2.4446579631545544e-06, + "loss": 0.7613, + "step": 5392 + }, + { + "epoch": 1.12, + "learning_rate": 2.4436916021473497e-06, + "loss": 0.7831, + "step": 5393 + }, + { + "epoch": 1.12, + "learning_rate": 2.4427253009154417e-06, + "loss": 0.9016, + "step": 5394 + }, + { + "epoch": 1.12, + "learning_rate": 2.4417590595626614e-06, + "loss": 0.8641, + "step": 5395 + }, + { + "epoch": 1.12, + "learning_rate": 2.4407928781928302e-06, + "loss": 0.9774, + "step": 5396 + }, + { + "epoch": 1.12, + "learning_rate": 2.439826756909764e-06, + "loss": 0.8459, + "step": 5397 + }, + { + "epoch": 1.12, + "learning_rate": 2.438860695817272e-06, + "loss": 0.998, + "step": 5398 + }, + { + "epoch": 1.12, + "learning_rate": 2.43789469501916e-06, + "loss": 0.8946, + "step": 5399 + }, + { + "epoch": 1.12, + "learning_rate": 2.436928754619222e-06, + "loss": 0.8568, + "step": 5400 + }, + { + "epoch": 1.12, + "learning_rate": 2.4359628747212492e-06, + "loss": 0.9063, + "step": 5401 + }, + { + "epoch": 1.12, + "learning_rate": 2.4349970554290247e-06, + "loss": 0.9024, + "step": 5402 + }, + { + "epoch": 1.12, + "learning_rate": 2.434031296846327e-06, + "loss": 0.8009, + "step": 5403 + }, + { + "epoch": 1.12, + "learning_rate": 2.433065599076926e-06, + "loss": 0.939, + "step": 5404 + }, + { + "epoch": 1.12, + "learning_rate": 2.4320999622245858e-06, + "loss": 0.9569, + "step": 5405 + }, + { + "epoch": 1.12, + "learning_rate": 2.4311343863930634e-06, + "loss": 1.0814, + "step": 5406 + }, + { + "epoch": 1.12, + "learning_rate": 2.430168871686112e-06, + "loss": 1.222, + "step": 5407 + }, + { + "epoch": 1.12, + "learning_rate": 2.4292034182074742e-06, + "loss": 0.7838, + "step": 5408 + }, + { + "epoch": 1.12, + "learning_rate": 2.428238026060888e-06, + "loss": 0.8051, + "step": 5409 + }, + { + "epoch": 1.13, + "learning_rate": 2.4272726953500844e-06, + "loss": 1.1764, + "step": 5410 + }, + { + "epoch": 1.13, + "learning_rate": 2.42630742617879e-06, + "loss": 0.8174, + "step": 5411 + }, + { + "epoch": 1.13, + "learning_rate": 2.4253422186507216e-06, + "loss": 0.7176, + "step": 5412 + }, + { + "epoch": 1.13, + "learning_rate": 2.424377072869591e-06, + "loss": 0.981, + "step": 5413 + }, + { + "epoch": 1.13, + "learning_rate": 2.4234119889391014e-06, + "loss": 0.927, + "step": 5414 + }, + { + "epoch": 1.13, + "learning_rate": 2.422446966962954e-06, + "loss": 1.0797, + "step": 5415 + }, + { + "epoch": 1.13, + "learning_rate": 2.421482007044838e-06, + "loss": 0.7752, + "step": 5416 + }, + { + "epoch": 1.13, + "learning_rate": 2.4205171092884398e-06, + "loss": 0.7886, + "step": 5417 + }, + { + "epoch": 1.13, + "learning_rate": 2.419552273797436e-06, + "loss": 0.9621, + "step": 5418 + }, + { + "epoch": 1.13, + "learning_rate": 2.4185875006754994e-06, + "loss": 0.7536, + "step": 5419 + }, + { + "epoch": 1.13, + "learning_rate": 2.4176227900262943e-06, + "loss": 0.9179, + "step": 5420 + }, + { + "epoch": 1.13, + "learning_rate": 2.4166581419534793e-06, + "loss": 1.0436, + "step": 5421 + }, + { + "epoch": 1.13, + "learning_rate": 2.415693556560704e-06, + "loss": 0.7963, + "step": 5422 + }, + { + "epoch": 1.13, + "learning_rate": 2.4147290339516154e-06, + "loss": 1.0848, + "step": 5423 + }, + { + "epoch": 1.13, + "learning_rate": 2.4137645742298505e-06, + "loss": 0.8711, + "step": 5424 + }, + { + "epoch": 1.13, + "learning_rate": 2.412800177499039e-06, + "loss": 0.7632, + "step": 5425 + }, + { + "epoch": 1.13, + "learning_rate": 2.4118358438628077e-06, + "loss": 0.8809, + "step": 5426 + }, + { + "epoch": 1.13, + "learning_rate": 2.4108715734247732e-06, + "loss": 0.7748, + "step": 5427 + }, + { + "epoch": 1.13, + "learning_rate": 2.409907366288546e-06, + "loss": 0.9742, + "step": 5428 + }, + { + "epoch": 1.13, + "learning_rate": 2.4089432225577286e-06, + "loss": 0.8633, + "step": 5429 + }, + { + "epoch": 1.13, + "learning_rate": 2.4079791423359214e-06, + "loss": 1.0183, + "step": 5430 + }, + { + "epoch": 1.13, + "learning_rate": 2.4070151257267133e-06, + "loss": 0.8488, + "step": 5431 + }, + { + "epoch": 1.13, + "learning_rate": 2.4060511728336876e-06, + "loss": 0.6782, + "step": 5432 + }, + { + "epoch": 1.13, + "learning_rate": 2.405087283760419e-06, + "loss": 0.9489, + "step": 5433 + }, + { + "epoch": 1.13, + "learning_rate": 2.4041234586104814e-06, + "loss": 0.9842, + "step": 5434 + }, + { + "epoch": 1.13, + "learning_rate": 2.403159697487435e-06, + "loss": 0.7827, + "step": 5435 + }, + { + "epoch": 1.13, + "learning_rate": 2.402196000494837e-06, + "loss": 0.7819, + "step": 5436 + }, + { + "epoch": 1.13, + "learning_rate": 2.4012323677362344e-06, + "loss": 0.9578, + "step": 5437 + }, + { + "epoch": 1.13, + "learning_rate": 2.4002687993151726e-06, + "loss": 0.8058, + "step": 5438 + }, + { + "epoch": 1.13, + "learning_rate": 2.3993052953351856e-06, + "loss": 0.8173, + "step": 5439 + }, + { + "epoch": 1.13, + "learning_rate": 2.3983418558998017e-06, + "loss": 0.9474, + "step": 5440 + }, + { + "epoch": 1.13, + "learning_rate": 2.3973784811125413e-06, + "loss": 0.7881, + "step": 5441 + }, + { + "epoch": 1.13, + "learning_rate": 2.3964151710769213e-06, + "loss": 0.8114, + "step": 5442 + }, + { + "epoch": 1.13, + "learning_rate": 2.395451925896448e-06, + "loss": 0.9245, + "step": 5443 + }, + { + "epoch": 1.13, + "learning_rate": 2.3944887456746226e-06, + "loss": 0.9082, + "step": 5444 + }, + { + "epoch": 1.13, + "learning_rate": 2.3935256305149364e-06, + "loss": 1.0817, + "step": 5445 + }, + { + "epoch": 1.13, + "learning_rate": 2.3925625805208795e-06, + "loss": 0.6958, + "step": 5446 + }, + { + "epoch": 1.13, + "learning_rate": 2.3915995957959298e-06, + "loss": 1.0326, + "step": 5447 + }, + { + "epoch": 1.13, + "learning_rate": 2.39063667644356e-06, + "loss": 0.7838, + "step": 5448 + }, + { + "epoch": 1.13, + "learning_rate": 2.389673822567235e-06, + "loss": 0.8171, + "step": 5449 + }, + { + "epoch": 1.13, + "learning_rate": 2.388711034270415e-06, + "loss": 0.7969, + "step": 5450 + }, + { + "epoch": 1.13, + "learning_rate": 2.3877483116565514e-06, + "loss": 0.7174, + "step": 5451 + }, + { + "epoch": 1.13, + "learning_rate": 2.3867856548290873e-06, + "loss": 0.9831, + "step": 5452 + }, + { + "epoch": 1.13, + "learning_rate": 2.38582306389146e-06, + "loss": 1.0048, + "step": 5453 + }, + { + "epoch": 1.13, + "learning_rate": 2.384860538947102e-06, + "loss": 0.7778, + "step": 5454 + }, + { + "epoch": 1.13, + "learning_rate": 2.383898080099435e-06, + "loss": 0.8828, + "step": 5455 + }, + { + "epoch": 1.13, + "learning_rate": 2.382935687451875e-06, + "loss": 1.096, + "step": 5456 + }, + { + "epoch": 1.13, + "learning_rate": 2.3819733611078303e-06, + "loss": 0.6968, + "step": 5457 + }, + { + "epoch": 1.14, + "learning_rate": 2.381011101170705e-06, + "loss": 0.9428, + "step": 5458 + }, + { + "epoch": 1.14, + "learning_rate": 2.3800489077438933e-06, + "loss": 0.9098, + "step": 5459 + }, + { + "epoch": 1.14, + "learning_rate": 2.3790867809307818e-06, + "loss": 0.8492, + "step": 5460 + }, + { + "epoch": 1.14, + "learning_rate": 2.37812472083475e-06, + "loss": 0.9224, + "step": 5461 + }, + { + "epoch": 1.14, + "learning_rate": 2.377162727559174e-06, + "loss": 1.1467, + "step": 5462 + }, + { + "epoch": 1.14, + "learning_rate": 2.3762008012074185e-06, + "loss": 1.0859, + "step": 5463 + }, + { + "epoch": 1.14, + "learning_rate": 2.375238941882843e-06, + "loss": 0.8281, + "step": 5464 + }, + { + "epoch": 1.14, + "learning_rate": 2.3742771496887965e-06, + "loss": 0.88, + "step": 5465 + }, + { + "epoch": 1.14, + "learning_rate": 2.373315424728627e-06, + "loss": 0.8811, + "step": 5466 + }, + { + "epoch": 1.14, + "learning_rate": 2.3723537671056704e-06, + "loss": 0.7716, + "step": 5467 + }, + { + "epoch": 1.14, + "learning_rate": 2.371392176923257e-06, + "loss": 0.8681, + "step": 5468 + }, + { + "epoch": 1.14, + "learning_rate": 2.370430654284708e-06, + "loss": 0.987, + "step": 5469 + }, + { + "epoch": 1.14, + "learning_rate": 2.369469199293341e-06, + "loss": 0.8321, + "step": 5470 + }, + { + "epoch": 1.14, + "learning_rate": 2.368507812052464e-06, + "loss": 0.9756, + "step": 5471 + }, + { + "epoch": 1.14, + "learning_rate": 2.3675464926653777e-06, + "loss": 0.6987, + "step": 5472 + }, + { + "epoch": 1.14, + "learning_rate": 2.366585241235374e-06, + "loss": 0.769, + "step": 5473 + }, + { + "epoch": 1.14, + "learning_rate": 2.3656240578657424e-06, + "loss": 0.8341, + "step": 5474 + }, + { + "epoch": 1.14, + "learning_rate": 2.3646629426597608e-06, + "loss": 0.8595, + "step": 5475 + }, + { + "epoch": 1.14, + "learning_rate": 2.3637018957207002e-06, + "loss": 0.8775, + "step": 5476 + }, + { + "epoch": 1.14, + "learning_rate": 2.3627409171518242e-06, + "loss": 1.1025, + "step": 5477 + }, + { + "epoch": 1.14, + "learning_rate": 2.361780007056393e-06, + "loss": 0.8462, + "step": 5478 + }, + { + "epoch": 1.14, + "learning_rate": 2.360819165537654e-06, + "loss": 0.9123, + "step": 5479 + }, + { + "epoch": 1.14, + "learning_rate": 2.3598583926988504e-06, + "loss": 0.8974, + "step": 5480 + }, + { + "epoch": 1.14, + "learning_rate": 2.3588976886432153e-06, + "loss": 0.8425, + "step": 5481 + }, + { + "epoch": 1.14, + "learning_rate": 2.3579370534739793e-06, + "loss": 0.9674, + "step": 5482 + }, + { + "epoch": 1.14, + "learning_rate": 2.356976487294361e-06, + "loss": 0.9959, + "step": 5483 + }, + { + "epoch": 1.14, + "learning_rate": 2.3560159902075732e-06, + "loss": 0.8991, + "step": 5484 + }, + { + "epoch": 1.14, + "learning_rate": 2.3550555623168196e-06, + "loss": 0.7641, + "step": 5485 + }, + { + "epoch": 1.14, + "learning_rate": 2.3540952037253017e-06, + "loss": 0.6567, + "step": 5486 + }, + { + "epoch": 1.14, + "learning_rate": 2.353134914536208e-06, + "loss": 0.8492, + "step": 5487 + }, + { + "epoch": 1.14, + "learning_rate": 2.352174694852721e-06, + "loss": 0.9502, + "step": 5488 + }, + { + "epoch": 1.14, + "learning_rate": 2.351214544778016e-06, + "loss": 0.9703, + "step": 5489 + }, + { + "epoch": 1.14, + "learning_rate": 2.350254464415263e-06, + "loss": 0.8644, + "step": 5490 + }, + { + "epoch": 1.14, + "learning_rate": 2.3492944538676216e-06, + "loss": 0.862, + "step": 5491 + }, + { + "epoch": 1.14, + "learning_rate": 2.3483345132382434e-06, + "loss": 0.9087, + "step": 5492 + }, + { + "epoch": 1.14, + "learning_rate": 2.3473746426302767e-06, + "loss": 0.9434, + "step": 5493 + }, + { + "epoch": 1.14, + "learning_rate": 2.3464148421468577e-06, + "loss": 0.9579, + "step": 5494 + }, + { + "epoch": 1.14, + "learning_rate": 2.3454551118911178e-06, + "loss": 0.8264, + "step": 5495 + }, + { + "epoch": 1.14, + "learning_rate": 2.3444954519661773e-06, + "loss": 0.8286, + "step": 5496 + }, + { + "epoch": 1.14, + "learning_rate": 2.343535862475156e-06, + "loss": 0.8817, + "step": 5497 + }, + { + "epoch": 1.14, + "learning_rate": 2.3425763435211587e-06, + "loss": 0.9308, + "step": 5498 + }, + { + "epoch": 1.14, + "learning_rate": 2.3416168952072865e-06, + "loss": 0.7876, + "step": 5499 + }, + { + "epoch": 1.14, + "learning_rate": 2.3406575176366306e-06, + "loss": 0.9679, + "step": 5500 + }, + { + "epoch": 1.14, + "learning_rate": 2.339698210912279e-06, + "loss": 0.8899, + "step": 5501 + }, + { + "epoch": 1.14, + "learning_rate": 2.338738975137307e-06, + "loss": 0.889, + "step": 5502 + }, + { + "epoch": 1.14, + "learning_rate": 2.3377798104147854e-06, + "loss": 0.8001, + "step": 5503 + }, + { + "epoch": 1.14, + "learning_rate": 2.3368207168477745e-06, + "loss": 0.8805, + "step": 5504 + }, + { + "epoch": 1.14, + "learning_rate": 2.3358616945393314e-06, + "loss": 1.0487, + "step": 5505 + }, + { + "epoch": 1.15, + "learning_rate": 2.3349027435925023e-06, + "loss": 0.8726, + "step": 5506 + }, + { + "epoch": 1.15, + "learning_rate": 2.3339438641103253e-06, + "loss": 0.9239, + "step": 5507 + }, + { + "epoch": 1.15, + "learning_rate": 2.3329850561958314e-06, + "loss": 0.8958, + "step": 5508 + }, + { + "epoch": 1.15, + "learning_rate": 2.3320263199520474e-06, + "loss": 0.8072, + "step": 5509 + }, + { + "epoch": 1.15, + "learning_rate": 2.3310676554819873e-06, + "loss": 1.0489, + "step": 5510 + }, + { + "epoch": 1.15, + "learning_rate": 2.33010906288866e-06, + "loss": 0.8533, + "step": 5511 + }, + { + "epoch": 1.15, + "learning_rate": 2.3291505422750647e-06, + "loss": 0.7483, + "step": 5512 + }, + { + "epoch": 1.15, + "learning_rate": 2.3281920937441972e-06, + "loss": 0.8683, + "step": 5513 + }, + { + "epoch": 1.15, + "learning_rate": 2.327233717399041e-06, + "loss": 0.8054, + "step": 5514 + }, + { + "epoch": 1.15, + "learning_rate": 2.326275413342574e-06, + "loss": 0.893, + "step": 5515 + }, + { + "epoch": 1.15, + "learning_rate": 2.3253171816777644e-06, + "loss": 0.8805, + "step": 5516 + }, + { + "epoch": 1.15, + "learning_rate": 2.324359022507577e-06, + "loss": 0.9211, + "step": 5517 + }, + { + "epoch": 1.15, + "learning_rate": 2.323400935934964e-06, + "loss": 0.9211, + "step": 5518 + }, + { + "epoch": 1.15, + "learning_rate": 2.3224429220628728e-06, + "loss": 0.7844, + "step": 5519 + }, + { + "epoch": 1.15, + "learning_rate": 2.3214849809942395e-06, + "loss": 1.1045, + "step": 5520 + }, + { + "epoch": 1.15, + "learning_rate": 2.320527112831998e-06, + "loss": 1.0465, + "step": 5521 + }, + { + "epoch": 1.15, + "learning_rate": 2.31956931767907e-06, + "loss": 0.8235, + "step": 5522 + }, + { + "epoch": 1.15, + "learning_rate": 2.3186115956383697e-06, + "loss": 0.8931, + "step": 5523 + }, + { + "epoch": 1.15, + "learning_rate": 2.3176539468128034e-06, + "loss": 1.0376, + "step": 5524 + }, + { + "epoch": 1.15, + "learning_rate": 2.3166963713052735e-06, + "loss": 0.9745, + "step": 5525 + }, + { + "epoch": 1.15, + "learning_rate": 2.31573886921867e-06, + "loss": 1.1493, + "step": 5526 + }, + { + "epoch": 1.15, + "learning_rate": 2.314781440655876e-06, + "loss": 0.8328, + "step": 5527 + }, + { + "epoch": 1.15, + "learning_rate": 2.313824085719767e-06, + "loss": 1.0781, + "step": 5528 + }, + { + "epoch": 1.15, + "learning_rate": 2.3128668045132103e-06, + "loss": 0.7675, + "step": 5529 + }, + { + "epoch": 1.15, + "learning_rate": 2.3119095971390674e-06, + "loss": 0.9608, + "step": 5530 + }, + { + "epoch": 1.15, + "learning_rate": 2.3109524637001892e-06, + "loss": 0.8963, + "step": 5531 + }, + { + "epoch": 1.15, + "learning_rate": 2.3099954042994187e-06, + "loss": 0.8578, + "step": 5532 + }, + { + "epoch": 1.15, + "learning_rate": 2.3090384190395932e-06, + "loss": 0.6581, + "step": 5533 + }, + { + "epoch": 1.15, + "learning_rate": 2.308081508023541e-06, + "loss": 0.9181, + "step": 5534 + }, + { + "epoch": 1.15, + "learning_rate": 2.3071246713540807e-06, + "loss": 0.925, + "step": 5535 + }, + { + "epoch": 1.15, + "learning_rate": 2.306167909134025e-06, + "loss": 0.994, + "step": 5536 + }, + { + "epoch": 1.15, + "learning_rate": 2.3052112214661776e-06, + "loss": 1.0201, + "step": 5537 + }, + { + "epoch": 1.15, + "learning_rate": 2.3042546084533356e-06, + "loss": 0.9187, + "step": 5538 + }, + { + "epoch": 1.15, + "learning_rate": 2.3032980701982866e-06, + "loss": 0.8483, + "step": 5539 + }, + { + "epoch": 1.15, + "learning_rate": 2.302341606803809e-06, + "loss": 0.8908, + "step": 5540 + }, + { + "epoch": 1.15, + "learning_rate": 2.3013852183726768e-06, + "loss": 0.8856, + "step": 5541 + }, + { + "epoch": 1.15, + "learning_rate": 2.300428905007653e-06, + "loss": 0.7518, + "step": 5542 + }, + { + "epoch": 1.15, + "learning_rate": 2.2994726668114935e-06, + "loss": 0.947, + "step": 5543 + }, + { + "epoch": 1.15, + "learning_rate": 2.2985165038869455e-06, + "loss": 0.9937, + "step": 5544 + }, + { + "epoch": 1.15, + "learning_rate": 2.2975604163367488e-06, + "loss": 0.885, + "step": 5545 + }, + { + "epoch": 1.15, + "learning_rate": 2.2966044042636365e-06, + "loss": 1.0408, + "step": 5546 + }, + { + "epoch": 1.15, + "learning_rate": 2.29564846777033e-06, + "loss": 0.8382, + "step": 5547 + }, + { + "epoch": 1.15, + "learning_rate": 2.2946926069595455e-06, + "loss": 0.6981, + "step": 5548 + }, + { + "epoch": 1.15, + "learning_rate": 2.2937368219339896e-06, + "loss": 0.9272, + "step": 5549 + }, + { + "epoch": 1.15, + "learning_rate": 2.292781112796362e-06, + "loss": 0.9389, + "step": 5550 + }, + { + "epoch": 1.15, + "learning_rate": 2.2918254796493545e-06, + "loss": 0.8521, + "step": 5551 + }, + { + "epoch": 1.15, + "learning_rate": 2.290869922595647e-06, + "loss": 0.8924, + "step": 5552 + }, + { + "epoch": 1.15, + "learning_rate": 2.2899144417379165e-06, + "loss": 0.8914, + "step": 5553 + }, + { + "epoch": 1.16, + "learning_rate": 2.2889590371788285e-06, + "loss": 0.9827, + "step": 5554 + }, + { + "epoch": 1.16, + "learning_rate": 2.2880037090210413e-06, + "loss": 0.9992, + "step": 5555 + }, + { + "epoch": 1.16, + "learning_rate": 2.2870484573672047e-06, + "loss": 0.6508, + "step": 5556 + }, + { + "epoch": 1.16, + "learning_rate": 2.2860932823199603e-06, + "loss": 0.9602, + "step": 5557 + }, + { + "epoch": 1.16, + "learning_rate": 2.285138183981942e-06, + "loss": 0.9286, + "step": 5558 + }, + { + "epoch": 1.16, + "learning_rate": 2.2841831624557747e-06, + "loss": 0.7895, + "step": 5559 + }, + { + "epoch": 1.16, + "learning_rate": 2.2832282178440757e-06, + "loss": 1.0617, + "step": 5560 + }, + { + "epoch": 1.16, + "learning_rate": 2.282273350249453e-06, + "loss": 0.962, + "step": 5561 + }, + { + "epoch": 1.16, + "learning_rate": 2.2813185597745085e-06, + "loss": 0.8417, + "step": 5562 + }, + { + "epoch": 1.16, + "learning_rate": 2.280363846521833e-06, + "loss": 0.9419, + "step": 5563 + }, + { + "epoch": 1.16, + "learning_rate": 2.279409210594011e-06, + "loss": 0.7282, + "step": 5564 + }, + { + "epoch": 1.16, + "learning_rate": 2.278454652093617e-06, + "loss": 0.8804, + "step": 5565 + }, + { + "epoch": 1.16, + "learning_rate": 2.27750017112322e-06, + "loss": 0.9513, + "step": 5566 + }, + { + "epoch": 1.16, + "learning_rate": 2.276545767785378e-06, + "loss": 0.8251, + "step": 5567 + }, + { + "epoch": 1.16, + "learning_rate": 2.275591442182642e-06, + "loss": 1.0187, + "step": 5568 + }, + { + "epoch": 1.16, + "learning_rate": 2.2746371944175528e-06, + "loss": 0.9546, + "step": 5569 + }, + { + "epoch": 1.16, + "learning_rate": 2.2736830245926463e-06, + "loss": 0.9331, + "step": 5570 + }, + { + "epoch": 1.16, + "learning_rate": 2.2727289328104464e-06, + "loss": 0.8649, + "step": 5571 + }, + { + "epoch": 1.16, + "learning_rate": 2.271774919173471e-06, + "loss": 0.8431, + "step": 5572 + }, + { + "epoch": 1.16, + "learning_rate": 2.2708209837842283e-06, + "loss": 1.0025, + "step": 5573 + }, + { + "epoch": 1.16, + "learning_rate": 2.2698671267452196e-06, + "loss": 0.9501, + "step": 5574 + }, + { + "epoch": 1.16, + "learning_rate": 2.268913348158935e-06, + "loss": 0.9841, + "step": 5575 + }, + { + "epoch": 1.16, + "learning_rate": 2.26795964812786e-06, + "loss": 1.0272, + "step": 5576 + }, + { + "epoch": 1.16, + "learning_rate": 2.2670060267544676e-06, + "loss": 1.0812, + "step": 5577 + }, + { + "epoch": 1.16, + "learning_rate": 2.266052484141226e-06, + "loss": 1.0769, + "step": 5578 + }, + { + "epoch": 1.16, + "learning_rate": 2.2650990203905923e-06, + "loss": 1.0406, + "step": 5579 + }, + { + "epoch": 1.16, + "learning_rate": 2.2641456356050167e-06, + "loss": 1.0352, + "step": 5580 + }, + { + "epoch": 1.16, + "learning_rate": 2.2631923298869394e-06, + "loss": 0.9705, + "step": 5581 + }, + { + "epoch": 1.16, + "learning_rate": 2.2622391033387943e-06, + "loss": 0.8707, + "step": 5582 + }, + { + "epoch": 1.16, + "learning_rate": 2.2612859560630046e-06, + "loss": 1.0313, + "step": 5583 + }, + { + "epoch": 1.16, + "learning_rate": 2.2603328881619866e-06, + "loss": 1.1498, + "step": 5584 + }, + { + "epoch": 1.16, + "learning_rate": 2.2593798997381464e-06, + "loss": 1.0202, + "step": 5585 + }, + { + "epoch": 1.16, + "learning_rate": 2.2584269908938837e-06, + "loss": 0.9039, + "step": 5586 + }, + { + "epoch": 1.16, + "learning_rate": 2.257474161731587e-06, + "loss": 0.8543, + "step": 5587 + }, + { + "epoch": 1.16, + "learning_rate": 2.2565214123536397e-06, + "loss": 1.0249, + "step": 5588 + }, + { + "epoch": 1.16, + "learning_rate": 2.2555687428624138e-06, + "loss": 0.9083, + "step": 5589 + }, + { + "epoch": 1.16, + "learning_rate": 2.2546161533602724e-06, + "loss": 0.9349, + "step": 5590 + }, + { + "epoch": 1.16, + "learning_rate": 2.2536636439495723e-06, + "loss": 1.0166, + "step": 5591 + }, + { + "epoch": 1.16, + "learning_rate": 2.2527112147326605e-06, + "loss": 0.919, + "step": 5592 + }, + { + "epoch": 1.16, + "learning_rate": 2.251758865811876e-06, + "loss": 0.925, + "step": 5593 + }, + { + "epoch": 1.16, + "learning_rate": 2.250806597289547e-06, + "loss": 1.0044, + "step": 5594 + }, + { + "epoch": 1.16, + "learning_rate": 2.2498544092679957e-06, + "loss": 1.0786, + "step": 5595 + }, + { + "epoch": 1.16, + "learning_rate": 2.248902301849535e-06, + "loss": 1.0428, + "step": 5596 + }, + { + "epoch": 1.16, + "learning_rate": 2.247950275136469e-06, + "loss": 0.931, + "step": 5597 + }, + { + "epoch": 1.16, + "learning_rate": 2.2469983292310914e-06, + "loss": 0.855, + "step": 5598 + }, + { + "epoch": 1.16, + "learning_rate": 2.2460464642356888e-06, + "loss": 0.982, + "step": 5599 + }, + { + "epoch": 1.16, + "learning_rate": 2.2450946802525413e-06, + "loss": 0.9526, + "step": 5600 + }, + { + "epoch": 1.16, + "learning_rate": 2.244142977383916e-06, + "loss": 0.7631, + "step": 5601 + }, + { + "epoch": 1.17, + "learning_rate": 2.243191355732074e-06, + "loss": 1.158, + "step": 5602 + }, + { + "epoch": 1.17, + "learning_rate": 2.2422398153992664e-06, + "loss": 1.1165, + "step": 5603 + }, + { + "epoch": 1.17, + "learning_rate": 2.2412883564877374e-06, + "loss": 0.765, + "step": 5604 + }, + { + "epoch": 1.17, + "learning_rate": 2.24033697909972e-06, + "loss": 0.9883, + "step": 5605 + }, + { + "epoch": 1.17, + "learning_rate": 2.2393856833374392e-06, + "loss": 0.7167, + "step": 5606 + }, + { + "epoch": 1.17, + "learning_rate": 2.238434469303113e-06, + "loss": 0.9491, + "step": 5607 + }, + { + "epoch": 1.17, + "learning_rate": 2.2374833370989493e-06, + "loss": 1.008, + "step": 5608 + }, + { + "epoch": 1.17, + "learning_rate": 2.236532286827146e-06, + "loss": 0.7814, + "step": 5609 + }, + { + "epoch": 1.17, + "learning_rate": 2.2355813185898937e-06, + "loss": 0.8405, + "step": 5610 + }, + { + "epoch": 1.17, + "learning_rate": 2.234630432489374e-06, + "loss": 0.7971, + "step": 5611 + }, + { + "epoch": 1.17, + "learning_rate": 2.233679628627761e-06, + "loss": 0.7374, + "step": 5612 + }, + { + "epoch": 1.17, + "learning_rate": 2.2327289071072168e-06, + "loss": 0.7473, + "step": 5613 + }, + { + "epoch": 1.17, + "learning_rate": 2.231778268029896e-06, + "loss": 0.7713, + "step": 5614 + }, + { + "epoch": 1.17, + "learning_rate": 2.230827711497946e-06, + "loss": 0.9075, + "step": 5615 + }, + { + "epoch": 1.17, + "learning_rate": 2.2298772376135034e-06, + "loss": 0.7237, + "step": 5616 + }, + { + "epoch": 1.17, + "learning_rate": 2.228926846478697e-06, + "loss": 1.0265, + "step": 5617 + }, + { + "epoch": 1.17, + "learning_rate": 2.2279765381956455e-06, + "loss": 0.9112, + "step": 5618 + }, + { + "epoch": 1.17, + "learning_rate": 2.22702631286646e-06, + "loss": 0.892, + "step": 5619 + }, + { + "epoch": 1.17, + "learning_rate": 2.2260761705932423e-06, + "loss": 1.0545, + "step": 5620 + }, + { + "epoch": 1.17, + "learning_rate": 2.2251261114780845e-06, + "loss": 0.8189, + "step": 5621 + }, + { + "epoch": 1.17, + "learning_rate": 2.224176135623071e-06, + "loss": 1.0189, + "step": 5622 + }, + { + "epoch": 1.17, + "learning_rate": 2.2232262431302755e-06, + "loss": 0.881, + "step": 5623 + }, + { + "epoch": 1.17, + "learning_rate": 2.2222764341017657e-06, + "loss": 0.9946, + "step": 5624 + }, + { + "epoch": 1.17, + "learning_rate": 2.2213267086395977e-06, + "loss": 0.8107, + "step": 5625 + }, + { + "epoch": 1.17, + "learning_rate": 2.220377066845818e-06, + "loss": 0.7502, + "step": 5626 + }, + { + "epoch": 1.17, + "learning_rate": 2.219427508822468e-06, + "loss": 0.8684, + "step": 5627 + }, + { + "epoch": 1.17, + "learning_rate": 2.2184780346715766e-06, + "loss": 0.8671, + "step": 5628 + }, + { + "epoch": 1.17, + "learning_rate": 2.2175286444951645e-06, + "loss": 0.7699, + "step": 5629 + }, + { + "epoch": 1.17, + "learning_rate": 2.216579338395243e-06, + "loss": 0.8299, + "step": 5630 + }, + { + "epoch": 1.17, + "learning_rate": 2.215630116473816e-06, + "loss": 0.9556, + "step": 5631 + }, + { + "epoch": 1.17, + "learning_rate": 2.2146809788328773e-06, + "loss": 0.8744, + "step": 5632 + }, + { + "epoch": 1.17, + "learning_rate": 2.2137319255744113e-06, + "loss": 0.9223, + "step": 5633 + }, + { + "epoch": 1.17, + "learning_rate": 2.2127829568003934e-06, + "loss": 0.7686, + "step": 5634 + }, + { + "epoch": 1.17, + "learning_rate": 2.2118340726127907e-06, + "loss": 0.8483, + "step": 5635 + }, + { + "epoch": 1.17, + "learning_rate": 2.2108852731135614e-06, + "loss": 0.7659, + "step": 5636 + }, + { + "epoch": 1.17, + "learning_rate": 2.2099365584046527e-06, + "loss": 1.0256, + "step": 5637 + }, + { + "epoch": 1.17, + "learning_rate": 2.2089879285880037e-06, + "loss": 0.823, + "step": 5638 + }, + { + "epoch": 1.17, + "learning_rate": 2.208039383765546e-06, + "loss": 0.8936, + "step": 5639 + }, + { + "epoch": 1.17, + "learning_rate": 2.2070909240392004e-06, + "loss": 0.8905, + "step": 5640 + }, + { + "epoch": 1.17, + "learning_rate": 2.2061425495108786e-06, + "loss": 1.0608, + "step": 5641 + }, + { + "epoch": 1.17, + "learning_rate": 2.2051942602824817e-06, + "loss": 0.9651, + "step": 5642 + }, + { + "epoch": 1.17, + "learning_rate": 2.204246056455906e-06, + "loss": 1.0775, + "step": 5643 + }, + { + "epoch": 1.17, + "learning_rate": 2.2032979381330347e-06, + "loss": 0.8742, + "step": 5644 + }, + { + "epoch": 1.17, + "learning_rate": 2.2023499054157434e-06, + "loss": 0.9758, + "step": 5645 + }, + { + "epoch": 1.17, + "learning_rate": 2.2014019584058964e-06, + "loss": 0.8902, + "step": 5646 + }, + { + "epoch": 1.17, + "learning_rate": 2.2004540972053536e-06, + "loss": 0.9287, + "step": 5647 + }, + { + "epoch": 1.17, + "learning_rate": 2.1995063219159605e-06, + "loss": 0.9106, + "step": 5648 + }, + { + "epoch": 1.17, + "learning_rate": 2.198558632639556e-06, + "loss": 0.8146, + "step": 5649 + }, + { + "epoch": 1.18, + "learning_rate": 2.197611029477968e-06, + "loss": 0.8216, + "step": 5650 + }, + { + "epoch": 1.18, + "learning_rate": 2.196663512533019e-06, + "loss": 0.828, + "step": 5651 + }, + { + "epoch": 1.18, + "learning_rate": 2.1957160819065186e-06, + "loss": 0.9108, + "step": 5652 + }, + { + "epoch": 1.18, + "learning_rate": 2.1947687377002673e-06, + "loss": 0.8463, + "step": 5653 + }, + { + "epoch": 1.18, + "learning_rate": 2.193821480016057e-06, + "loss": 1.0868, + "step": 5654 + }, + { + "epoch": 1.18, + "learning_rate": 2.192874308955672e-06, + "loss": 0.853, + "step": 5655 + }, + { + "epoch": 1.18, + "learning_rate": 2.1919272246208843e-06, + "loss": 0.7841, + "step": 5656 + }, + { + "epoch": 1.18, + "learning_rate": 2.1909802271134595e-06, + "loss": 0.8883, + "step": 5657 + }, + { + "epoch": 1.18, + "learning_rate": 2.19003331653515e-06, + "loss": 0.8858, + "step": 5658 + }, + { + "epoch": 1.18, + "learning_rate": 2.189086492987704e-06, + "loss": 0.9463, + "step": 5659 + }, + { + "epoch": 1.18, + "learning_rate": 2.1881397565728565e-06, + "loss": 1.035, + "step": 5660 + }, + { + "epoch": 1.18, + "learning_rate": 2.1871931073923343e-06, + "loss": 0.9025, + "step": 5661 + }, + { + "epoch": 1.18, + "learning_rate": 2.186246545547854e-06, + "loss": 0.8028, + "step": 5662 + }, + { + "epoch": 1.18, + "learning_rate": 2.185300071141125e-06, + "loss": 0.8379, + "step": 5663 + }, + { + "epoch": 1.18, + "learning_rate": 2.184353684273845e-06, + "loss": 0.995, + "step": 5664 + }, + { + "epoch": 1.18, + "learning_rate": 2.1834073850477038e-06, + "loss": 0.8797, + "step": 5665 + }, + { + "epoch": 1.18, + "learning_rate": 2.1824611735643797e-06, + "loss": 0.8696, + "step": 5666 + }, + { + "epoch": 1.18, + "learning_rate": 2.181515049925545e-06, + "loss": 0.8451, + "step": 5667 + }, + { + "epoch": 1.18, + "learning_rate": 2.18056901423286e-06, + "loss": 0.9118, + "step": 5668 + }, + { + "epoch": 1.18, + "learning_rate": 2.179623066587976e-06, + "loss": 0.8301, + "step": 5669 + }, + { + "epoch": 1.18, + "learning_rate": 2.1786772070925333e-06, + "loss": 1.0058, + "step": 5670 + }, + { + "epoch": 1.18, + "learning_rate": 2.177731435848168e-06, + "loss": 0.7521, + "step": 5671 + }, + { + "epoch": 1.18, + "learning_rate": 2.1767857529565006e-06, + "loss": 0.7776, + "step": 5672 + }, + { + "epoch": 1.18, + "learning_rate": 2.175840158519146e-06, + "loss": 1.064, + "step": 5673 + }, + { + "epoch": 1.18, + "learning_rate": 2.1748946526377055e-06, + "loss": 0.7572, + "step": 5674 + }, + { + "epoch": 1.18, + "learning_rate": 2.173949235413777e-06, + "loss": 1.0631, + "step": 5675 + }, + { + "epoch": 1.18, + "learning_rate": 2.1730039069489447e-06, + "loss": 0.8719, + "step": 5676 + }, + { + "epoch": 1.18, + "learning_rate": 2.1720586673447835e-06, + "loss": 0.9575, + "step": 5677 + }, + { + "epoch": 1.18, + "learning_rate": 2.171113516702858e-06, + "loss": 0.7841, + "step": 5678 + }, + { + "epoch": 1.18, + "learning_rate": 2.1701684551247277e-06, + "loss": 0.8419, + "step": 5679 + }, + { + "epoch": 1.18, + "learning_rate": 2.1692234827119373e-06, + "loss": 0.7977, + "step": 5680 + }, + { + "epoch": 1.18, + "learning_rate": 2.1682785995660243e-06, + "loss": 0.8249, + "step": 5681 + }, + { + "epoch": 1.18, + "learning_rate": 2.167333805788515e-06, + "loss": 0.9933, + "step": 5682 + }, + { + "epoch": 1.18, + "learning_rate": 2.1663891014809308e-06, + "loss": 0.9642, + "step": 5683 + }, + { + "epoch": 1.18, + "learning_rate": 2.1654444867447773e-06, + "loss": 0.8426, + "step": 5684 + }, + { + "epoch": 1.18, + "learning_rate": 2.164499961681555e-06, + "loss": 0.8807, + "step": 5685 + }, + { + "epoch": 1.18, + "learning_rate": 2.1635555263927503e-06, + "loss": 1.1491, + "step": 5686 + }, + { + "epoch": 1.18, + "learning_rate": 2.162611180979846e-06, + "loss": 0.8824, + "step": 5687 + }, + { + "epoch": 1.18, + "learning_rate": 2.1616669255443108e-06, + "loss": 0.9719, + "step": 5688 + }, + { + "epoch": 1.18, + "learning_rate": 2.1607227601876046e-06, + "loss": 0.9146, + "step": 5689 + }, + { + "epoch": 1.18, + "learning_rate": 2.1597786850111763e-06, + "loss": 0.6946, + "step": 5690 + }, + { + "epoch": 1.18, + "learning_rate": 2.15883470011647e-06, + "loss": 0.9826, + "step": 5691 + }, + { + "epoch": 1.18, + "learning_rate": 2.157890805604915e-06, + "loss": 0.9407, + "step": 5692 + }, + { + "epoch": 1.18, + "learning_rate": 2.156947001577931e-06, + "loss": 0.895, + "step": 5693 + }, + { + "epoch": 1.18, + "learning_rate": 2.1560032881369337e-06, + "loss": 0.9068, + "step": 5694 + }, + { + "epoch": 1.18, + "learning_rate": 2.1550596653833227e-06, + "loss": 0.86, + "step": 5695 + }, + { + "epoch": 1.18, + "learning_rate": 2.15411613341849e-06, + "loss": 0.9297, + "step": 5696 + }, + { + "epoch": 1.18, + "learning_rate": 2.1531726923438175e-06, + "loss": 1.04, + "step": 5697 + }, + { + "epoch": 1.19, + "learning_rate": 2.15222934226068e-06, + "loss": 0.9492, + "step": 5698 + }, + { + "epoch": 1.19, + "learning_rate": 2.151286083270439e-06, + "loss": 0.9311, + "step": 5699 + }, + { + "epoch": 1.19, + "learning_rate": 2.150342915474448e-06, + "loss": 1.0052, + "step": 5700 + }, + { + "epoch": 1.19, + "learning_rate": 2.1493998389740487e-06, + "loss": 0.9864, + "step": 5701 + }, + { + "epoch": 1.19, + "learning_rate": 2.1484568538705766e-06, + "loss": 1.0153, + "step": 5702 + }, + { + "epoch": 1.19, + "learning_rate": 2.1475139602653555e-06, + "loss": 1.1474, + "step": 5703 + }, + { + "epoch": 1.19, + "learning_rate": 2.1465711582596987e-06, + "loss": 0.7756, + "step": 5704 + }, + { + "epoch": 1.19, + "learning_rate": 2.1456284479549087e-06, + "loss": 0.9611, + "step": 5705 + }, + { + "epoch": 1.19, + "learning_rate": 2.144685829452282e-06, + "loss": 0.975, + "step": 5706 + }, + { + "epoch": 1.19, + "learning_rate": 2.1437433028531016e-06, + "loss": 0.8028, + "step": 5707 + }, + { + "epoch": 1.19, + "learning_rate": 2.142800868258643e-06, + "loss": 0.8146, + "step": 5708 + }, + { + "epoch": 1.19, + "learning_rate": 2.141858525770168e-06, + "loss": 0.79, + "step": 5709 + }, + { + "epoch": 1.19, + "learning_rate": 2.140916275488935e-06, + "loss": 0.636, + "step": 5710 + }, + { + "epoch": 1.19, + "learning_rate": 2.139974117516187e-06, + "loss": 0.7092, + "step": 5711 + }, + { + "epoch": 1.19, + "learning_rate": 2.139032051953158e-06, + "loss": 1.006, + "step": 5712 + }, + { + "epoch": 1.19, + "learning_rate": 2.1380900789010725e-06, + "loss": 0.8167, + "step": 5713 + }, + { + "epoch": 1.19, + "learning_rate": 2.137148198461148e-06, + "loss": 0.8613, + "step": 5714 + }, + { + "epoch": 1.19, + "learning_rate": 2.1362064107345878e-06, + "loss": 0.964, + "step": 5715 + }, + { + "epoch": 1.19, + "learning_rate": 2.1352647158225873e-06, + "loss": 0.9335, + "step": 5716 + }, + { + "epoch": 1.19, + "learning_rate": 2.13432311382633e-06, + "loss": 1.0682, + "step": 5717 + }, + { + "epoch": 1.19, + "learning_rate": 2.133381604846994e-06, + "loss": 0.8114, + "step": 5718 + }, + { + "epoch": 1.19, + "learning_rate": 2.132440188985742e-06, + "loss": 0.9259, + "step": 5719 + }, + { + "epoch": 1.19, + "learning_rate": 2.1314988663437304e-06, + "loss": 0.9711, + "step": 5720 + }, + { + "epoch": 1.19, + "learning_rate": 2.130557637022102e-06, + "loss": 1.1779, + "step": 5721 + }, + { + "epoch": 1.19, + "learning_rate": 2.1296165011219947e-06, + "loss": 0.8686, + "step": 5722 + }, + { + "epoch": 1.19, + "learning_rate": 2.128675458744533e-06, + "loss": 0.8824, + "step": 5723 + }, + { + "epoch": 1.19, + "learning_rate": 2.12773450999083e-06, + "loss": 0.8448, + "step": 5724 + }, + { + "epoch": 1.19, + "learning_rate": 2.126793654961991e-06, + "loss": 0.995, + "step": 5725 + }, + { + "epoch": 1.19, + "learning_rate": 2.1258528937591126e-06, + "loss": 1.0905, + "step": 5726 + }, + { + "epoch": 1.19, + "learning_rate": 2.1249122264832783e-06, + "loss": 0.7187, + "step": 5727 + }, + { + "epoch": 1.19, + "learning_rate": 2.1239716532355627e-06, + "loss": 0.9567, + "step": 5728 + }, + { + "epoch": 1.19, + "learning_rate": 2.12303117411703e-06, + "loss": 1.0126, + "step": 5729 + }, + { + "epoch": 1.19, + "learning_rate": 2.1220907892287357e-06, + "loss": 1.0955, + "step": 5730 + }, + { + "epoch": 1.19, + "learning_rate": 2.1211504986717233e-06, + "loss": 1.1024, + "step": 5731 + }, + { + "epoch": 1.19, + "learning_rate": 2.1202103025470274e-06, + "loss": 0.7869, + "step": 5732 + }, + { + "epoch": 1.19, + "learning_rate": 2.1192702009556705e-06, + "loss": 0.8325, + "step": 5733 + }, + { + "epoch": 1.19, + "learning_rate": 2.1183301939986686e-06, + "loss": 0.6544, + "step": 5734 + }, + { + "epoch": 1.19, + "learning_rate": 2.1173902817770245e-06, + "loss": 0.8533, + "step": 5735 + }, + { + "epoch": 1.19, + "learning_rate": 2.1164504643917314e-06, + "loss": 0.995, + "step": 5736 + }, + { + "epoch": 1.19, + "learning_rate": 2.115510741943772e-06, + "loss": 1.0661, + "step": 5737 + }, + { + "epoch": 1.19, + "learning_rate": 2.1145711145341217e-06, + "loss": 0.8991, + "step": 5738 + }, + { + "epoch": 1.19, + "learning_rate": 2.1136315822637415e-06, + "loss": 0.9965, + "step": 5739 + }, + { + "epoch": 1.19, + "learning_rate": 2.112692145233585e-06, + "loss": 0.8644, + "step": 5740 + }, + { + "epoch": 1.19, + "learning_rate": 2.111752803544593e-06, + "loss": 0.9062, + "step": 5741 + }, + { + "epoch": 1.19, + "learning_rate": 2.110813557297699e-06, + "loss": 1.0016, + "step": 5742 + }, + { + "epoch": 1.19, + "learning_rate": 2.1098744065938256e-06, + "loss": 0.7329, + "step": 5743 + }, + { + "epoch": 1.19, + "learning_rate": 2.1089353515338843e-06, + "loss": 0.9739, + "step": 5744 + }, + { + "epoch": 1.19, + "learning_rate": 2.107996392218774e-06, + "loss": 0.8691, + "step": 5745 + }, + { + "epoch": 1.2, + "learning_rate": 2.107057528749389e-06, + "loss": 0.8004, + "step": 5746 + }, + { + "epoch": 1.2, + "learning_rate": 2.1061187612266087e-06, + "loss": 0.959, + "step": 5747 + }, + { + "epoch": 1.2, + "learning_rate": 2.105180089751304e-06, + "loss": 0.9097, + "step": 5748 + }, + { + "epoch": 1.2, + "learning_rate": 2.104241514424333e-06, + "loss": 1.0171, + "step": 5749 + }, + { + "epoch": 1.2, + "learning_rate": 2.1033030353465495e-06, + "loss": 0.7767, + "step": 5750 + }, + { + "epoch": 1.2, + "learning_rate": 2.1023646526187903e-06, + "loss": 0.8448, + "step": 5751 + }, + { + "epoch": 1.2, + "learning_rate": 2.101426366341885e-06, + "loss": 0.8401, + "step": 5752 + }, + { + "epoch": 1.2, + "learning_rate": 2.1004881766166514e-06, + "loss": 0.951, + "step": 5753 + }, + { + "epoch": 1.2, + "learning_rate": 2.0995500835439003e-06, + "loss": 0.948, + "step": 5754 + }, + { + "epoch": 1.2, + "learning_rate": 2.0986120872244286e-06, + "loss": 0.9432, + "step": 5755 + }, + { + "epoch": 1.2, + "learning_rate": 2.0976741877590227e-06, + "loss": 1.0906, + "step": 5756 + }, + { + "epoch": 1.2, + "learning_rate": 2.096736385248462e-06, + "loss": 0.8354, + "step": 5757 + }, + { + "epoch": 1.2, + "learning_rate": 2.0957986797935118e-06, + "loss": 0.6783, + "step": 5758 + }, + { + "epoch": 1.2, + "learning_rate": 2.09486107149493e-06, + "loss": 1.0799, + "step": 5759 + }, + { + "epoch": 1.2, + "learning_rate": 2.093923560453459e-06, + "loss": 0.9067, + "step": 5760 + }, + { + "epoch": 1.2, + "learning_rate": 2.0929861467698387e-06, + "loss": 0.8264, + "step": 5761 + }, + { + "epoch": 1.2, + "learning_rate": 2.092048830544792e-06, + "loss": 0.7049, + "step": 5762 + }, + { + "epoch": 1.2, + "learning_rate": 2.091111611879034e-06, + "loss": 1.0155, + "step": 5763 + }, + { + "epoch": 1.2, + "learning_rate": 2.0901744908732667e-06, + "loss": 0.959, + "step": 5764 + }, + { + "epoch": 1.2, + "learning_rate": 2.089237467628187e-06, + "loss": 0.9197, + "step": 5765 + }, + { + "epoch": 1.2, + "learning_rate": 2.0883005422444766e-06, + "loss": 0.8472, + "step": 5766 + }, + { + "epoch": 1.2, + "learning_rate": 2.0873637148228074e-06, + "loss": 0.9117, + "step": 5767 + }, + { + "epoch": 1.2, + "learning_rate": 2.0864269854638407e-06, + "loss": 0.975, + "step": 5768 + }, + { + "epoch": 1.2, + "learning_rate": 2.085490354268231e-06, + "loss": 0.8263, + "step": 5769 + }, + { + "epoch": 1.2, + "learning_rate": 2.0845538213366173e-06, + "loss": 0.8815, + "step": 5770 + }, + { + "epoch": 1.2, + "learning_rate": 2.08361738676963e-06, + "loss": 0.64, + "step": 5771 + }, + { + "epoch": 1.2, + "learning_rate": 2.0826810506678877e-06, + "loss": 0.7922, + "step": 5772 + }, + { + "epoch": 1.2, + "learning_rate": 2.0817448131320026e-06, + "loss": 0.9503, + "step": 5773 + }, + { + "epoch": 1.2, + "learning_rate": 2.0808086742625717e-06, + "loss": 0.967, + "step": 5774 + }, + { + "epoch": 1.2, + "learning_rate": 2.079872634160183e-06, + "loss": 0.9548, + "step": 5775 + }, + { + "epoch": 1.2, + "learning_rate": 2.078936692925413e-06, + "loss": 0.8336, + "step": 5776 + }, + { + "epoch": 1.2, + "learning_rate": 2.078000850658831e-06, + "loss": 1.0339, + "step": 5777 + }, + { + "epoch": 1.2, + "learning_rate": 2.0770651074609915e-06, + "loss": 0.8448, + "step": 5778 + }, + { + "epoch": 1.2, + "learning_rate": 2.0761294634324406e-06, + "loss": 0.888, + "step": 5779 + }, + { + "epoch": 1.2, + "learning_rate": 2.0751939186737116e-06, + "loss": 0.8832, + "step": 5780 + }, + { + "epoch": 1.2, + "learning_rate": 2.0742584732853313e-06, + "loss": 0.9126, + "step": 5781 + }, + { + "epoch": 1.2, + "learning_rate": 2.0733231273678116e-06, + "loss": 0.9399, + "step": 5782 + }, + { + "epoch": 1.2, + "learning_rate": 2.072387881021656e-06, + "loss": 0.8625, + "step": 5783 + }, + { + "epoch": 1.2, + "learning_rate": 2.0714527343473553e-06, + "loss": 0.8162, + "step": 5784 + }, + { + "epoch": 1.2, + "learning_rate": 2.0705176874453934e-06, + "loss": 0.9331, + "step": 5785 + }, + { + "epoch": 1.2, + "learning_rate": 2.0695827404162397e-06, + "loss": 0.8288, + "step": 5786 + }, + { + "epoch": 1.2, + "learning_rate": 2.0686478933603536e-06, + "loss": 0.9352, + "step": 5787 + }, + { + "epoch": 1.2, + "learning_rate": 2.0677131463781843e-06, + "loss": 0.96, + "step": 5788 + }, + { + "epoch": 1.2, + "learning_rate": 2.0667784995701716e-06, + "loss": 0.764, + "step": 5789 + }, + { + "epoch": 1.2, + "learning_rate": 2.065843953036743e-06, + "loss": 1.0334, + "step": 5790 + }, + { + "epoch": 1.2, + "learning_rate": 2.064909506878314e-06, + "loss": 0.7106, + "step": 5791 + }, + { + "epoch": 1.2, + "learning_rate": 2.063975161195292e-06, + "loss": 0.6875, + "step": 5792 + }, + { + "epoch": 1.2, + "learning_rate": 2.063040916088072e-06, + "loss": 0.7823, + "step": 5793 + }, + { + "epoch": 1.21, + "learning_rate": 2.06210677165704e-06, + "loss": 1.0071, + "step": 5794 + }, + { + "epoch": 1.21, + "learning_rate": 2.061172728002568e-06, + "loss": 0.8239, + "step": 5795 + }, + { + "epoch": 1.21, + "learning_rate": 2.0602387852250174e-06, + "loss": 0.8723, + "step": 5796 + }, + { + "epoch": 1.21, + "learning_rate": 2.059304943424744e-06, + "loss": 0.9923, + "step": 5797 + }, + { + "epoch": 1.21, + "learning_rate": 2.0583712027020874e-06, + "loss": 0.8474, + "step": 5798 + }, + { + "epoch": 1.21, + "learning_rate": 2.057437563157378e-06, + "loss": 0.7737, + "step": 5799 + }, + { + "epoch": 1.21, + "learning_rate": 2.0565040248909337e-06, + "loss": 1.19, + "step": 5800 + }, + { + "epoch": 1.21, + "learning_rate": 2.055570588003066e-06, + "loss": 1.0343, + "step": 5801 + }, + { + "epoch": 1.21, + "learning_rate": 2.054637252594071e-06, + "loss": 0.8008, + "step": 5802 + }, + { + "epoch": 1.21, + "learning_rate": 2.0537040187642358e-06, + "loss": 0.8104, + "step": 5803 + }, + { + "epoch": 1.21, + "learning_rate": 2.052770886613835e-06, + "loss": 0.927, + "step": 5804 + }, + { + "epoch": 1.21, + "learning_rate": 2.0518378562431367e-06, + "loss": 0.9942, + "step": 5805 + }, + { + "epoch": 1.21, + "learning_rate": 2.050904927752392e-06, + "loss": 1.0359, + "step": 5806 + }, + { + "epoch": 1.21, + "learning_rate": 2.049972101241846e-06, + "loss": 0.9274, + "step": 5807 + }, + { + "epoch": 1.21, + "learning_rate": 2.049039376811728e-06, + "loss": 0.9198, + "step": 5808 + }, + { + "epoch": 1.21, + "learning_rate": 2.048106754562263e-06, + "loss": 0.8861, + "step": 5809 + }, + { + "epoch": 1.21, + "learning_rate": 2.047174234593659e-06, + "loss": 0.9958, + "step": 5810 + }, + { + "epoch": 1.21, + "learning_rate": 2.0462418170061155e-06, + "loss": 0.7964, + "step": 5811 + }, + { + "epoch": 1.21, + "learning_rate": 2.0453095018998194e-06, + "loss": 0.8221, + "step": 5812 + }, + { + "epoch": 1.21, + "learning_rate": 2.0443772893749505e-06, + "loss": 0.891, + "step": 5813 + }, + { + "epoch": 1.21, + "learning_rate": 2.0434451795316736e-06, + "loss": 0.9647, + "step": 5814 + }, + { + "epoch": 1.21, + "learning_rate": 2.0425131724701442e-06, + "loss": 0.941, + "step": 5815 + }, + { + "epoch": 1.21, + "learning_rate": 2.0415812682905047e-06, + "loss": 1.1199, + "step": 5816 + }, + { + "epoch": 1.21, + "learning_rate": 2.0406494670928903e-06, + "loss": 0.7921, + "step": 5817 + }, + { + "epoch": 1.21, + "learning_rate": 2.039717768977423e-06, + "loss": 0.7692, + "step": 5818 + }, + { + "epoch": 1.21, + "learning_rate": 2.0387861740442125e-06, + "loss": 0.7491, + "step": 5819 + }, + { + "epoch": 1.21, + "learning_rate": 2.0378546823933576e-06, + "loss": 0.9522, + "step": 5820 + }, + { + "epoch": 1.21, + "learning_rate": 2.03692329412495e-06, + "loss": 0.7655, + "step": 5821 + }, + { + "epoch": 1.21, + "learning_rate": 2.0359920093390654e-06, + "loss": 0.889, + "step": 5822 + }, + { + "epoch": 1.21, + "learning_rate": 2.03506082813577e-06, + "loss": 0.9084, + "step": 5823 + }, + { + "epoch": 1.21, + "learning_rate": 2.03412975061512e-06, + "loss": 0.9185, + "step": 5824 + }, + { + "epoch": 1.21, + "learning_rate": 2.0331987768771594e-06, + "loss": 0.7785, + "step": 5825 + }, + { + "epoch": 1.21, + "learning_rate": 2.032267907021922e-06, + "loss": 0.8567, + "step": 5826 + }, + { + "epoch": 1.21, + "learning_rate": 2.0313371411494283e-06, + "loss": 1.1906, + "step": 5827 + }, + { + "epoch": 1.21, + "learning_rate": 2.030406479359689e-06, + "loss": 0.787, + "step": 5828 + }, + { + "epoch": 1.21, + "learning_rate": 2.0294759217527053e-06, + "loss": 0.9943, + "step": 5829 + }, + { + "epoch": 1.21, + "learning_rate": 2.0285454684284647e-06, + "loss": 0.9325, + "step": 5830 + }, + { + "epoch": 1.21, + "learning_rate": 2.0276151194869433e-06, + "loss": 1.0559, + "step": 5831 + }, + { + "epoch": 1.21, + "learning_rate": 2.0266848750281082e-06, + "loss": 0.9285, + "step": 5832 + }, + { + "epoch": 1.21, + "learning_rate": 2.0257547351519145e-06, + "loss": 0.8734, + "step": 5833 + }, + { + "epoch": 1.21, + "learning_rate": 2.0248246999583047e-06, + "loss": 0.7703, + "step": 5834 + }, + { + "epoch": 1.21, + "learning_rate": 2.0238947695472106e-06, + "loss": 0.9302, + "step": 5835 + }, + { + "epoch": 1.21, + "learning_rate": 2.022964944018554e-06, + "loss": 0.8589, + "step": 5836 + }, + { + "epoch": 1.21, + "learning_rate": 2.0220352234722453e-06, + "loss": 0.8343, + "step": 5837 + }, + { + "epoch": 1.21, + "learning_rate": 2.021105608008182e-06, + "loss": 0.8353, + "step": 5838 + }, + { + "epoch": 1.21, + "learning_rate": 2.02017609772625e-06, + "loss": 1.0552, + "step": 5839 + }, + { + "epoch": 1.21, + "learning_rate": 2.019246692726327e-06, + "loss": 0.9043, + "step": 5840 + }, + { + "epoch": 1.21, + "learning_rate": 2.018317393108277e-06, + "loss": 0.7668, + "step": 5841 + }, + { + "epoch": 1.22, + "learning_rate": 2.0173881989719535e-06, + "loss": 0.8291, + "step": 5842 + }, + { + "epoch": 1.22, + "learning_rate": 2.0164591104171974e-06, + "loss": 0.9041, + "step": 5843 + }, + { + "epoch": 1.22, + "learning_rate": 2.0155301275438398e-06, + "loss": 1.0043, + "step": 5844 + }, + { + "epoch": 1.22, + "learning_rate": 2.0146012504517e-06, + "loss": 0.9959, + "step": 5845 + }, + { + "epoch": 1.22, + "learning_rate": 2.0136724792405854e-06, + "loss": 0.8879, + "step": 5846 + }, + { + "epoch": 1.22, + "learning_rate": 2.012743814010292e-06, + "loss": 0.7254, + "step": 5847 + }, + { + "epoch": 1.22, + "learning_rate": 2.011815254860606e-06, + "loss": 0.6722, + "step": 5848 + }, + { + "epoch": 1.22, + "learning_rate": 2.0108868018913e-06, + "loss": 0.7352, + "step": 5849 + }, + { + "epoch": 1.22, + "learning_rate": 2.009958455202137e-06, + "loss": 1.004, + "step": 5850 + }, + { + "epoch": 1.22, + "learning_rate": 2.009030214892867e-06, + "loss": 0.7231, + "step": 5851 + }, + { + "epoch": 1.22, + "learning_rate": 2.008102081063229e-06, + "loss": 0.7372, + "step": 5852 + }, + { + "epoch": 1.22, + "learning_rate": 2.0071740538129524e-06, + "loss": 0.9621, + "step": 5853 + }, + { + "epoch": 1.22, + "learning_rate": 2.0062461332417537e-06, + "loss": 0.8341, + "step": 5854 + }, + { + "epoch": 1.22, + "learning_rate": 2.0053183194493355e-06, + "loss": 1.1004, + "step": 5855 + }, + { + "epoch": 1.22, + "learning_rate": 2.0043906125353935e-06, + "loss": 0.9457, + "step": 5856 + }, + { + "epoch": 1.22, + "learning_rate": 2.0034630125996096e-06, + "loss": 0.9368, + "step": 5857 + }, + { + "epoch": 1.22, + "learning_rate": 2.0025355197416532e-06, + "loss": 0.8397, + "step": 5858 + }, + { + "epoch": 1.22, + "learning_rate": 2.0016081340611843e-06, + "loss": 0.9884, + "step": 5859 + }, + { + "epoch": 1.22, + "learning_rate": 2.0006808556578505e-06, + "loss": 0.6653, + "step": 5860 + }, + { + "epoch": 1.22, + "learning_rate": 1.999753684631287e-06, + "loss": 0.9909, + "step": 5861 + }, + { + "epoch": 1.22, + "learning_rate": 1.998826621081119e-06, + "loss": 0.9588, + "step": 5862 + }, + { + "epoch": 1.22, + "learning_rate": 1.9978996651069586e-06, + "loss": 0.9239, + "step": 5863 + }, + { + "epoch": 1.22, + "learning_rate": 1.9969728168084084e-06, + "loss": 0.7223, + "step": 5864 + }, + { + "epoch": 1.22, + "learning_rate": 1.996046076285057e-06, + "loss": 1.0695, + "step": 5865 + }, + { + "epoch": 1.22, + "learning_rate": 1.995119443636483e-06, + "loss": 1.0172, + "step": 5866 + }, + { + "epoch": 1.22, + "learning_rate": 1.9941929189622526e-06, + "loss": 0.923, + "step": 5867 + }, + { + "epoch": 1.22, + "learning_rate": 1.9932665023619222e-06, + "loss": 0.8049, + "step": 5868 + }, + { + "epoch": 1.22, + "learning_rate": 1.992340193935034e-06, + "loss": 0.9733, + "step": 5869 + }, + { + "epoch": 1.22, + "learning_rate": 1.9914139937811206e-06, + "loss": 0.7627, + "step": 5870 + }, + { + "epoch": 1.22, + "learning_rate": 1.9904879019997004e-06, + "loss": 1.1354, + "step": 5871 + }, + { + "epoch": 1.22, + "learning_rate": 1.9895619186902837e-06, + "loss": 0.8811, + "step": 5872 + }, + { + "epoch": 1.22, + "learning_rate": 1.988636043952367e-06, + "loss": 0.9765, + "step": 5873 + }, + { + "epoch": 1.22, + "learning_rate": 1.987710277885435e-06, + "loss": 0.8455, + "step": 5874 + }, + { + "epoch": 1.22, + "learning_rate": 1.986784620588961e-06, + "loss": 0.8408, + "step": 5875 + }, + { + "epoch": 1.22, + "learning_rate": 1.985859072162408e-06, + "loss": 0.9446, + "step": 5876 + }, + { + "epoch": 1.22, + "learning_rate": 1.9849336327052247e-06, + "loss": 0.9313, + "step": 5877 + }, + { + "epoch": 1.22, + "learning_rate": 1.9840083023168507e-06, + "loss": 0.9063, + "step": 5878 + }, + { + "epoch": 1.22, + "learning_rate": 1.9830830810967113e-06, + "loss": 0.9788, + "step": 5879 + }, + { + "epoch": 1.22, + "learning_rate": 1.982157969144223e-06, + "loss": 0.7999, + "step": 5880 + }, + { + "epoch": 1.22, + "learning_rate": 1.9812329665587878e-06, + "loss": 0.9618, + "step": 5881 + }, + { + "epoch": 1.22, + "learning_rate": 1.980308073439798e-06, + "loss": 0.9173, + "step": 5882 + }, + { + "epoch": 1.22, + "learning_rate": 1.9793832898866325e-06, + "loss": 0.8825, + "step": 5883 + }, + { + "epoch": 1.22, + "learning_rate": 1.97845861599866e-06, + "loss": 0.7667, + "step": 5884 + }, + { + "epoch": 1.22, + "learning_rate": 1.9775340518752363e-06, + "loss": 0.7889, + "step": 5885 + }, + { + "epoch": 1.22, + "learning_rate": 1.976609597615706e-06, + "loss": 0.9735, + "step": 5886 + }, + { + "epoch": 1.22, + "learning_rate": 1.975685253319401e-06, + "loss": 0.8752, + "step": 5887 + }, + { + "epoch": 1.22, + "learning_rate": 1.9747610190856425e-06, + "loss": 0.9743, + "step": 5888 + }, + { + "epoch": 1.22, + "learning_rate": 1.9738368950137395e-06, + "loss": 0.8717, + "step": 5889 + }, + { + "epoch": 1.23, + "learning_rate": 1.9729128812029887e-06, + "loss": 1.1434, + "step": 5890 + }, + { + "epoch": 1.23, + "learning_rate": 1.9719889777526757e-06, + "loss": 1.2185, + "step": 5891 + }, + { + "epoch": 1.23, + "learning_rate": 1.9710651847620743e-06, + "loss": 0.8891, + "step": 5892 + }, + { + "epoch": 1.23, + "learning_rate": 1.970141502330444e-06, + "loss": 0.9726, + "step": 5893 + }, + { + "epoch": 1.23, + "learning_rate": 1.9692179305570366e-06, + "loss": 0.7808, + "step": 5894 + }, + { + "epoch": 1.23, + "learning_rate": 1.968294469541089e-06, + "loss": 0.8929, + "step": 5895 + }, + { + "epoch": 1.23, + "learning_rate": 1.967371119381827e-06, + "loss": 1.0235, + "step": 5896 + }, + { + "epoch": 1.23, + "learning_rate": 1.9664478801784647e-06, + "loss": 0.9623, + "step": 5897 + }, + { + "epoch": 1.23, + "learning_rate": 1.965524752030203e-06, + "loss": 0.6864, + "step": 5898 + }, + { + "epoch": 1.23, + "learning_rate": 1.9646017350362342e-06, + "loss": 0.8736, + "step": 5899 + }, + { + "epoch": 1.23, + "learning_rate": 1.9636788292957345e-06, + "loss": 0.835, + "step": 5900 + }, + { + "epoch": 1.23, + "learning_rate": 1.9627560349078703e-06, + "loss": 0.8484, + "step": 5901 + }, + { + "epoch": 1.23, + "learning_rate": 1.961833351971796e-06, + "loss": 0.8363, + "step": 5902 + }, + { + "epoch": 1.23, + "learning_rate": 1.960910780586655e-06, + "loss": 0.7197, + "step": 5903 + }, + { + "epoch": 1.23, + "learning_rate": 1.9599883208515763e-06, + "loss": 0.8618, + "step": 5904 + }, + { + "epoch": 1.23, + "learning_rate": 1.9590659728656772e-06, + "loss": 0.7077, + "step": 5905 + }, + { + "epoch": 1.23, + "learning_rate": 1.9581437367280657e-06, + "loss": 0.9388, + "step": 5906 + }, + { + "epoch": 1.23, + "learning_rate": 1.9572216125378356e-06, + "loss": 0.8907, + "step": 5907 + }, + { + "epoch": 1.23, + "learning_rate": 1.9562996003940686e-06, + "loss": 0.8687, + "step": 5908 + }, + { + "epoch": 1.23, + "learning_rate": 1.955377700395835e-06, + "loss": 0.7465, + "step": 5909 + }, + { + "epoch": 1.23, + "learning_rate": 1.9544559126421927e-06, + "loss": 0.8004, + "step": 5910 + }, + { + "epoch": 1.23, + "learning_rate": 1.9535342372321887e-06, + "loss": 0.8085, + "step": 5911 + }, + { + "epoch": 1.23, + "learning_rate": 1.952612674264856e-06, + "loss": 0.9726, + "step": 5912 + }, + { + "epoch": 1.23, + "learning_rate": 1.951691223839216e-06, + "loss": 0.9722, + "step": 5913 + }, + { + "epoch": 1.23, + "learning_rate": 1.95076988605428e-06, + "loss": 0.9029, + "step": 5914 + }, + { + "epoch": 1.23, + "learning_rate": 1.949848661009045e-06, + "loss": 1.0242, + "step": 5915 + }, + { + "epoch": 1.23, + "learning_rate": 1.9489275488024956e-06, + "loss": 0.9046, + "step": 5916 + }, + { + "epoch": 1.23, + "learning_rate": 1.9480065495336064e-06, + "loss": 0.7905, + "step": 5917 + }, + { + "epoch": 1.23, + "learning_rate": 1.947085663301338e-06, + "loss": 0.9168, + "step": 5918 + }, + { + "epoch": 1.23, + "learning_rate": 1.9461648902046406e-06, + "loss": 1.0392, + "step": 5919 + }, + { + "epoch": 1.23, + "learning_rate": 1.945244230342451e-06, + "loss": 1.1302, + "step": 5920 + }, + { + "epoch": 1.23, + "learning_rate": 1.944323683813693e-06, + "loss": 0.6724, + "step": 5921 + }, + { + "epoch": 1.23, + "learning_rate": 1.9434032507172794e-06, + "loss": 0.8838, + "step": 5922 + }, + { + "epoch": 1.23, + "learning_rate": 1.9424829311521115e-06, + "loss": 0.9155, + "step": 5923 + }, + { + "epoch": 1.23, + "learning_rate": 1.941562725217078e-06, + "loss": 1.0448, + "step": 5924 + }, + { + "epoch": 1.23, + "learning_rate": 1.9406426330110534e-06, + "loss": 0.9446, + "step": 5925 + }, + { + "epoch": 1.23, + "learning_rate": 1.9397226546329014e-06, + "loss": 0.9246, + "step": 5926 + }, + { + "epoch": 1.23, + "learning_rate": 1.9388027901814757e-06, + "loss": 0.937, + "step": 5927 + }, + { + "epoch": 1.23, + "learning_rate": 1.937883039755614e-06, + "loss": 0.8551, + "step": 5928 + }, + { + "epoch": 1.23, + "learning_rate": 1.9369634034541445e-06, + "loss": 0.8775, + "step": 5929 + }, + { + "epoch": 1.23, + "learning_rate": 1.9360438813758798e-06, + "loss": 0.9527, + "step": 5930 + }, + { + "epoch": 1.23, + "learning_rate": 1.935124473619626e-06, + "loss": 0.753, + "step": 5931 + }, + { + "epoch": 1.23, + "learning_rate": 1.934205180284171e-06, + "loss": 0.765, + "step": 5932 + }, + { + "epoch": 1.23, + "learning_rate": 1.9332860014682933e-06, + "loss": 1.1864, + "step": 5933 + }, + { + "epoch": 1.23, + "learning_rate": 1.932366937270757e-06, + "loss": 0.9378, + "step": 5934 + }, + { + "epoch": 1.23, + "learning_rate": 1.9314479877903194e-06, + "loss": 0.7995, + "step": 5935 + }, + { + "epoch": 1.23, + "learning_rate": 1.9305291531257185e-06, + "loss": 0.8935, + "step": 5936 + }, + { + "epoch": 1.23, + "learning_rate": 1.9296104333756843e-06, + "loss": 0.8831, + "step": 5937 + }, + { + "epoch": 1.24, + "learning_rate": 1.928691828638931e-06, + "loss": 0.8184, + "step": 5938 + }, + { + "epoch": 1.24, + "learning_rate": 1.9277733390141653e-06, + "loss": 1.0694, + "step": 5939 + }, + { + "epoch": 1.24, + "learning_rate": 1.926854964600078e-06, + "loss": 0.8065, + "step": 5940 + }, + { + "epoch": 1.24, + "learning_rate": 1.9259367054953484e-06, + "loss": 0.9645, + "step": 5941 + }, + { + "epoch": 1.24, + "learning_rate": 1.925018561798642e-06, + "loss": 1.0232, + "step": 5942 + }, + { + "epoch": 1.24, + "learning_rate": 1.9241005336086153e-06, + "loss": 0.9543, + "step": 5943 + }, + { + "epoch": 1.24, + "learning_rate": 1.9231826210239096e-06, + "loss": 0.8099, + "step": 5944 + }, + { + "epoch": 1.24, + "learning_rate": 1.922264824143155e-06, + "loss": 1.0304, + "step": 5945 + }, + { + "epoch": 1.24, + "learning_rate": 1.921347143064966e-06, + "loss": 1.2458, + "step": 5946 + }, + { + "epoch": 1.24, + "learning_rate": 1.9204295778879513e-06, + "loss": 0.9721, + "step": 5947 + }, + { + "epoch": 1.24, + "learning_rate": 1.919512128710701e-06, + "loss": 0.7167, + "step": 5948 + }, + { + "epoch": 1.24, + "learning_rate": 1.9185947956317955e-06, + "loss": 1.1369, + "step": 5949 + }, + { + "epoch": 1.24, + "learning_rate": 1.9176775787498012e-06, + "loss": 0.9562, + "step": 5950 + }, + { + "epoch": 1.24, + "learning_rate": 1.916760478163275e-06, + "loss": 0.8168, + "step": 5951 + }, + { + "epoch": 1.24, + "learning_rate": 1.915843493970758e-06, + "loss": 0.8894, + "step": 5952 + }, + { + "epoch": 1.24, + "learning_rate": 1.91492662627078e-06, + "loss": 0.9239, + "step": 5953 + }, + { + "epoch": 1.24, + "learning_rate": 1.914009875161857e-06, + "loss": 0.828, + "step": 5954 + }, + { + "epoch": 1.24, + "learning_rate": 1.9130932407424976e-06, + "loss": 1.1202, + "step": 5955 + }, + { + "epoch": 1.24, + "learning_rate": 1.9121767231111913e-06, + "loss": 0.7607, + "step": 5956 + }, + { + "epoch": 1.24, + "learning_rate": 1.9112603223664172e-06, + "loss": 0.7591, + "step": 5957 + }, + { + "epoch": 1.24, + "learning_rate": 1.9103440386066454e-06, + "loss": 1.1535, + "step": 5958 + }, + { + "epoch": 1.24, + "learning_rate": 1.9094278719303288e-06, + "loss": 0.9375, + "step": 5959 + }, + { + "epoch": 1.24, + "learning_rate": 1.9085118224359093e-06, + "loss": 0.7997, + "step": 5960 + }, + { + "epoch": 1.24, + "learning_rate": 1.9075958902218154e-06, + "loss": 0.9328, + "step": 5961 + }, + { + "epoch": 1.24, + "learning_rate": 1.9066800753864665e-06, + "loss": 0.8812, + "step": 5962 + }, + { + "epoch": 1.24, + "learning_rate": 1.9057643780282657e-06, + "loss": 0.7584, + "step": 5963 + }, + { + "epoch": 1.24, + "learning_rate": 1.9048487982456037e-06, + "loss": 0.9116, + "step": 5964 + }, + { + "epoch": 1.24, + "learning_rate": 1.9039333361368595e-06, + "loss": 0.754, + "step": 5965 + }, + { + "epoch": 1.24, + "learning_rate": 1.9030179918004012e-06, + "loss": 0.843, + "step": 5966 + }, + { + "epoch": 1.24, + "learning_rate": 1.9021027653345818e-06, + "loss": 0.8722, + "step": 5967 + }, + { + "epoch": 1.24, + "learning_rate": 1.9011876568377416e-06, + "loss": 1.0289, + "step": 5968 + }, + { + "epoch": 1.24, + "learning_rate": 1.900272666408208e-06, + "loss": 1.0252, + "step": 5969 + }, + { + "epoch": 1.24, + "learning_rate": 1.899357794144299e-06, + "loss": 0.9011, + "step": 5970 + }, + { + "epoch": 1.24, + "learning_rate": 1.8984430401443166e-06, + "loss": 0.9614, + "step": 5971 + }, + { + "epoch": 1.24, + "learning_rate": 1.897528404506551e-06, + "loss": 0.8813, + "step": 5972 + }, + { + "epoch": 1.24, + "learning_rate": 1.8966138873292782e-06, + "loss": 1.0581, + "step": 5973 + }, + { + "epoch": 1.24, + "learning_rate": 1.895699488710766e-06, + "loss": 0.8795, + "step": 5974 + }, + { + "epoch": 1.24, + "learning_rate": 1.894785208749265e-06, + "loss": 0.8181, + "step": 5975 + }, + { + "epoch": 1.24, + "learning_rate": 1.893871047543014e-06, + "loss": 0.8204, + "step": 5976 + }, + { + "epoch": 1.24, + "learning_rate": 1.8929570051902389e-06, + "loss": 0.8824, + "step": 5977 + }, + { + "epoch": 1.24, + "learning_rate": 1.8920430817891564e-06, + "loss": 0.9394, + "step": 5978 + }, + { + "epoch": 1.24, + "learning_rate": 1.891129277437965e-06, + "loss": 0.8035, + "step": 5979 + }, + { + "epoch": 1.24, + "learning_rate": 1.890215592234854e-06, + "loss": 0.7297, + "step": 5980 + }, + { + "epoch": 1.24, + "learning_rate": 1.8893020262779973e-06, + "loss": 0.8863, + "step": 5981 + }, + { + "epoch": 1.24, + "learning_rate": 1.8883885796655595e-06, + "loss": 0.7729, + "step": 5982 + }, + { + "epoch": 1.24, + "learning_rate": 1.8874752524956896e-06, + "loss": 0.9723, + "step": 5983 + }, + { + "epoch": 1.24, + "learning_rate": 1.8865620448665247e-06, + "loss": 0.936, + "step": 5984 + }, + { + "epoch": 1.24, + "learning_rate": 1.8856489568761869e-06, + "loss": 0.9153, + "step": 5985 + }, + { + "epoch": 1.25, + "learning_rate": 1.8847359886227907e-06, + "loss": 0.6754, + "step": 5986 + }, + { + "epoch": 1.25, + "learning_rate": 1.883823140204433e-06, + "loss": 0.9466, + "step": 5987 + }, + { + "epoch": 1.25, + "learning_rate": 1.882910411719199e-06, + "loss": 1.031, + "step": 5988 + }, + { + "epoch": 1.25, + "learning_rate": 1.8819978032651606e-06, + "loss": 1.0642, + "step": 5989 + }, + { + "epoch": 1.25, + "learning_rate": 1.8810853149403793e-06, + "loss": 0.758, + "step": 5990 + }, + { + "epoch": 1.25, + "learning_rate": 1.8801729468429017e-06, + "loss": 1.1194, + "step": 5991 + }, + { + "epoch": 1.25, + "learning_rate": 1.8792606990707605e-06, + "loss": 0.9489, + "step": 5992 + }, + { + "epoch": 1.25, + "learning_rate": 1.8783485717219761e-06, + "loss": 1.109, + "step": 5993 + }, + { + "epoch": 1.25, + "learning_rate": 1.8774365648945586e-06, + "loss": 0.972, + "step": 5994 + }, + { + "epoch": 1.25, + "learning_rate": 1.8765246786865025e-06, + "loss": 0.9471, + "step": 5995 + }, + { + "epoch": 1.25, + "learning_rate": 1.8756129131957894e-06, + "loss": 1.0747, + "step": 5996 + }, + { + "epoch": 1.25, + "learning_rate": 1.8747012685203876e-06, + "loss": 1.0877, + "step": 5997 + }, + { + "epoch": 1.25, + "learning_rate": 1.873789744758255e-06, + "loss": 0.8866, + "step": 5998 + }, + { + "epoch": 1.25, + "learning_rate": 1.8728783420073343e-06, + "loss": 0.766, + "step": 5999 + }, + { + "epoch": 1.25, + "learning_rate": 1.871967060365555e-06, + "loss": 0.9616, + "step": 6000 + }, + { + "epoch": 1.25, + "eval_loss": NaN, + "eval_runtime": 15.0476, + "eval_samples_per_second": 351.882, + "eval_steps_per_second": 43.994, + "step": 6000 + }, + { + "epoch": 1.25, + "learning_rate": 1.8710558999308333e-06, + "loss": 0.7334, + "step": 6001 + }, + { + "epoch": 1.25, + "learning_rate": 1.870144860801076e-06, + "loss": 1.087, + "step": 6002 + }, + { + "epoch": 1.25, + "learning_rate": 1.8692339430741728e-06, + "loss": 1.171, + "step": 6003 + }, + { + "epoch": 1.25, + "learning_rate": 1.8683231468480018e-06, + "loss": 0.8325, + "step": 6004 + }, + { + "epoch": 1.25, + "learning_rate": 1.8674124722204266e-06, + "loss": 0.8344, + "step": 6005 + }, + { + "epoch": 1.25, + "learning_rate": 1.866501919289302e-06, + "loss": 1.1737, + "step": 6006 + }, + { + "epoch": 1.25, + "learning_rate": 1.8655914881524643e-06, + "loss": 0.813, + "step": 6007 + }, + { + "epoch": 1.25, + "learning_rate": 1.864681178907741e-06, + "loss": 0.794, + "step": 6008 + }, + { + "epoch": 1.25, + "learning_rate": 1.863770991652943e-06, + "loss": 1.0518, + "step": 6009 + }, + { + "epoch": 1.25, + "learning_rate": 1.8628609264858716e-06, + "loss": 1.0793, + "step": 6010 + }, + { + "epoch": 1.25, + "learning_rate": 1.8619509835043125e-06, + "loss": 1.0489, + "step": 6011 + }, + { + "epoch": 1.25, + "learning_rate": 1.8610411628060394e-06, + "loss": 0.7703, + "step": 6012 + }, + { + "epoch": 1.25, + "learning_rate": 1.8601314644888108e-06, + "loss": 0.9001, + "step": 6013 + }, + { + "epoch": 1.25, + "learning_rate": 1.859221888650376e-06, + "loss": 0.9587, + "step": 6014 + }, + { + "epoch": 1.25, + "learning_rate": 1.8583124353884675e-06, + "loss": 0.8702, + "step": 6015 + }, + { + "epoch": 1.25, + "learning_rate": 1.857403104800807e-06, + "loss": 1.1964, + "step": 6016 + }, + { + "epoch": 1.25, + "learning_rate": 1.8564938969851e-06, + "loss": 0.9816, + "step": 6017 + }, + { + "epoch": 1.25, + "learning_rate": 1.8555848120390434e-06, + "loss": 0.6898, + "step": 6018 + }, + { + "epoch": 1.25, + "learning_rate": 1.8546758500603169e-06, + "loss": 0.9403, + "step": 6019 + }, + { + "epoch": 1.25, + "learning_rate": 1.8537670111465888e-06, + "loss": 0.8586, + "step": 6020 + }, + { + "epoch": 1.25, + "learning_rate": 1.852858295395512e-06, + "loss": 0.859, + "step": 6021 + }, + { + "epoch": 1.25, + "learning_rate": 1.8519497029047308e-06, + "loss": 0.9582, + "step": 6022 + }, + { + "epoch": 1.25, + "learning_rate": 1.8510412337718722e-06, + "loss": 0.9122, + "step": 6023 + }, + { + "epoch": 1.25, + "learning_rate": 1.8501328880945498e-06, + "loss": 0.7779, + "step": 6024 + }, + { + "epoch": 1.25, + "learning_rate": 1.8492246659703682e-06, + "loss": 0.8311, + "step": 6025 + }, + { + "epoch": 1.25, + "learning_rate": 1.8483165674969134e-06, + "loss": 0.818, + "step": 6026 + }, + { + "epoch": 1.25, + "learning_rate": 1.847408592771762e-06, + "loss": 1.0137, + "step": 6027 + }, + { + "epoch": 1.25, + "learning_rate": 1.8465007418924734e-06, + "loss": 0.8189, + "step": 6028 + }, + { + "epoch": 1.25, + "learning_rate": 1.845593014956599e-06, + "loss": 1.1313, + "step": 6029 + }, + { + "epoch": 1.25, + "learning_rate": 1.8446854120616724e-06, + "loss": 0.8797, + "step": 6030 + }, + { + "epoch": 1.25, + "learning_rate": 1.843777933305216e-06, + "loss": 0.8788, + "step": 6031 + }, + { + "epoch": 1.25, + "learning_rate": 1.8428705787847374e-06, + "loss": 1.2206, + "step": 6032 + }, + { + "epoch": 1.25, + "learning_rate": 1.8419633485977335e-06, + "loss": 0.9204, + "step": 6033 + }, + { + "epoch": 1.25, + "learning_rate": 1.8410562428416851e-06, + "loss": 1.0369, + "step": 6034 + }, + { + "epoch": 1.26, + "learning_rate": 1.8401492616140608e-06, + "loss": 0.7957, + "step": 6035 + }, + { + "epoch": 1.26, + "learning_rate": 1.8392424050123143e-06, + "loss": 0.9675, + "step": 6036 + }, + { + "epoch": 1.26, + "learning_rate": 1.8383356731338896e-06, + "loss": 0.7549, + "step": 6037 + }, + { + "epoch": 1.26, + "learning_rate": 1.837429066076214e-06, + "loss": 1.0352, + "step": 6038 + }, + { + "epoch": 1.26, + "learning_rate": 1.8365225839367026e-06, + "loss": 0.7648, + "step": 6039 + }, + { + "epoch": 1.26, + "learning_rate": 1.8356162268127553e-06, + "loss": 0.818, + "step": 6040 + }, + { + "epoch": 1.26, + "learning_rate": 1.834709994801762e-06, + "loss": 1.0697, + "step": 6041 + }, + { + "epoch": 1.26, + "learning_rate": 1.833803888001097e-06, + "loss": 0.823, + "step": 6042 + }, + { + "epoch": 1.26, + "learning_rate": 1.8328979065081212e-06, + "loss": 1.0941, + "step": 6043 + }, + { + "epoch": 1.26, + "learning_rate": 1.8319920504201806e-06, + "loss": 0.8946, + "step": 6044 + }, + { + "epoch": 1.26, + "learning_rate": 1.831086319834612e-06, + "loss": 0.8262, + "step": 6045 + }, + { + "epoch": 1.26, + "learning_rate": 1.830180714848735e-06, + "loss": 0.9893, + "step": 6046 + }, + { + "epoch": 1.26, + "learning_rate": 1.8292752355598569e-06, + "loss": 1.1304, + "step": 6047 + }, + { + "epoch": 1.26, + "learning_rate": 1.8283698820652698e-06, + "loss": 0.9088, + "step": 6048 + }, + { + "epoch": 1.26, + "learning_rate": 1.8274646544622566e-06, + "loss": 0.9102, + "step": 6049 + }, + { + "epoch": 1.26, + "learning_rate": 1.8265595528480822e-06, + "loss": 1.0948, + "step": 6050 + }, + { + "epoch": 1.26, + "learning_rate": 1.8256545773200006e-06, + "loss": 0.8512, + "step": 6051 + }, + { + "epoch": 1.26, + "learning_rate": 1.8247497279752491e-06, + "loss": 1.0994, + "step": 6052 + }, + { + "epoch": 1.26, + "learning_rate": 1.8238450049110576e-06, + "loss": 0.7229, + "step": 6053 + }, + { + "epoch": 1.26, + "learning_rate": 1.8229404082246355e-06, + "loss": 0.9562, + "step": 6054 + }, + { + "epoch": 1.26, + "learning_rate": 1.8220359380131828e-06, + "loss": 0.904, + "step": 6055 + }, + { + "epoch": 1.26, + "learning_rate": 1.8211315943738834e-06, + "loss": 1.0957, + "step": 6056 + }, + { + "epoch": 1.26, + "learning_rate": 1.8202273774039113e-06, + "loss": 0.8709, + "step": 6057 + }, + { + "epoch": 1.26, + "learning_rate": 1.8193232872004234e-06, + "loss": 1.009, + "step": 6058 + }, + { + "epoch": 1.26, + "learning_rate": 1.818419323860564e-06, + "loss": 1.1701, + "step": 6059 + }, + { + "epoch": 1.26, + "learning_rate": 1.8175154874814623e-06, + "loss": 0.8199, + "step": 6060 + }, + { + "epoch": 1.26, + "learning_rate": 1.816611778160239e-06, + "loss": 0.8028, + "step": 6061 + }, + { + "epoch": 1.26, + "learning_rate": 1.815708195993995e-06, + "loss": 0.8552, + "step": 6062 + }, + { + "epoch": 1.26, + "learning_rate": 1.8148047410798213e-06, + "loss": 0.7969, + "step": 6063 + }, + { + "epoch": 1.26, + "learning_rate": 1.813901413514792e-06, + "loss": 0.7576, + "step": 6064 + }, + { + "epoch": 1.26, + "learning_rate": 1.8129982133959729e-06, + "loss": 0.9265, + "step": 6065 + }, + { + "epoch": 1.26, + "learning_rate": 1.8120951408204106e-06, + "loss": 1.1469, + "step": 6066 + }, + { + "epoch": 1.26, + "learning_rate": 1.8111921958851408e-06, + "loss": 0.7657, + "step": 6067 + }, + { + "epoch": 1.26, + "learning_rate": 1.8102893786871835e-06, + "loss": 0.7063, + "step": 6068 + }, + { + "epoch": 1.26, + "learning_rate": 1.8093866893235489e-06, + "loss": 0.9192, + "step": 6069 + }, + { + "epoch": 1.26, + "learning_rate": 1.8084841278912295e-06, + "loss": 1.0966, + "step": 6070 + }, + { + "epoch": 1.26, + "learning_rate": 1.8075816944872055e-06, + "loss": 0.8378, + "step": 6071 + }, + { + "epoch": 1.26, + "learning_rate": 1.8066793892084422e-06, + "loss": 0.8692, + "step": 6072 + }, + { + "epoch": 1.26, + "learning_rate": 1.8057772121518945e-06, + "loss": 0.9061, + "step": 6073 + }, + { + "epoch": 1.26, + "learning_rate": 1.8048751634145e-06, + "loss": 0.8315, + "step": 6074 + }, + { + "epoch": 1.26, + "learning_rate": 1.8039732430931842e-06, + "loss": 0.7282, + "step": 6075 + }, + { + "epoch": 1.26, + "learning_rate": 1.8030714512848568e-06, + "loss": 0.8962, + "step": 6076 + }, + { + "epoch": 1.26, + "learning_rate": 1.8021697880864179e-06, + "loss": 0.8811, + "step": 6077 + }, + { + "epoch": 1.26, + "learning_rate": 1.8012682535947496e-06, + "loss": 1.0578, + "step": 6078 + }, + { + "epoch": 1.26, + "learning_rate": 1.8003668479067217e-06, + "loss": 1.1296, + "step": 6079 + }, + { + "epoch": 1.26, + "learning_rate": 1.7994655711191894e-06, + "loss": 0.867, + "step": 6080 + }, + { + "epoch": 1.26, + "learning_rate": 1.7985644233289974e-06, + "loss": 0.8472, + "step": 6081 + }, + { + "epoch": 1.26, + "learning_rate": 1.7976634046329718e-06, + "loss": 1.0592, + "step": 6082 + }, + { + "epoch": 1.27, + "learning_rate": 1.7967625151279276e-06, + "loss": 0.9746, + "step": 6083 + }, + { + "epoch": 1.27, + "learning_rate": 1.7958617549106646e-06, + "loss": 0.8077, + "step": 6084 + }, + { + "epoch": 1.27, + "learning_rate": 1.7949611240779706e-06, + "loss": 0.7412, + "step": 6085 + }, + { + "epoch": 1.27, + "learning_rate": 1.7940606227266183e-06, + "loss": 0.9674, + "step": 6086 + }, + { + "epoch": 1.27, + "learning_rate": 1.7931602509533661e-06, + "loss": 0.7623, + "step": 6087 + }, + { + "epoch": 1.27, + "learning_rate": 1.7922600088549577e-06, + "loss": 0.8917, + "step": 6088 + }, + { + "epoch": 1.27, + "learning_rate": 1.791359896528126e-06, + "loss": 1.0034, + "step": 6089 + }, + { + "epoch": 1.27, + "learning_rate": 1.790459914069587e-06, + "loss": 0.9192, + "step": 6090 + }, + { + "epoch": 1.27, + "learning_rate": 1.7895600615760432e-06, + "loss": 0.8832, + "step": 6091 + }, + { + "epoch": 1.27, + "learning_rate": 1.7886603391441853e-06, + "loss": 0.9643, + "step": 6092 + }, + { + "epoch": 1.27, + "learning_rate": 1.7877607468706878e-06, + "loss": 0.7251, + "step": 6093 + }, + { + "epoch": 1.27, + "learning_rate": 1.7868612848522114e-06, + "loss": 0.898, + "step": 6094 + }, + { + "epoch": 1.27, + "learning_rate": 1.7859619531854023e-06, + "loss": 1.0841, + "step": 6095 + }, + { + "epoch": 1.27, + "learning_rate": 1.7850627519668955e-06, + "loss": 1.232, + "step": 6096 + }, + { + "epoch": 1.27, + "learning_rate": 1.7841636812933096e-06, + "loss": 0.8786, + "step": 6097 + }, + { + "epoch": 1.27, + "learning_rate": 1.7832647412612496e-06, + "loss": 1.0893, + "step": 6098 + }, + { + "epoch": 1.27, + "learning_rate": 1.7823659319673047e-06, + "loss": 0.8073, + "step": 6099 + }, + { + "epoch": 1.27, + "learning_rate": 1.781467253508055e-06, + "loss": 0.7899, + "step": 6100 + }, + { + "epoch": 1.27, + "learning_rate": 1.7805687059800616e-06, + "loss": 0.7355, + "step": 6101 + }, + { + "epoch": 1.27, + "learning_rate": 1.7796702894798744e-06, + "loss": 0.992, + "step": 6102 + }, + { + "epoch": 1.27, + "learning_rate": 1.778772004104026e-06, + "loss": 0.7668, + "step": 6103 + }, + { + "epoch": 1.27, + "learning_rate": 1.7778738499490395e-06, + "loss": 1.0202, + "step": 6104 + }, + { + "epoch": 1.27, + "learning_rate": 1.7769758271114208e-06, + "loss": 1.2435, + "step": 6105 + }, + { + "epoch": 1.27, + "learning_rate": 1.7760779356876624e-06, + "loss": 0.8656, + "step": 6106 + }, + { + "epoch": 1.27, + "learning_rate": 1.7751801757742408e-06, + "loss": 0.9971, + "step": 6107 + }, + { + "epoch": 1.27, + "learning_rate": 1.7742825474676236e-06, + "loss": 1.0055, + "step": 6108 + }, + { + "epoch": 1.27, + "learning_rate": 1.7733850508642589e-06, + "loss": 0.7929, + "step": 6109 + }, + { + "epoch": 1.27, + "learning_rate": 1.7724876860605832e-06, + "loss": 1.1683, + "step": 6110 + }, + { + "epoch": 1.27, + "learning_rate": 1.7715904531530164e-06, + "loss": 0.9933, + "step": 6111 + }, + { + "epoch": 1.27, + "learning_rate": 1.7706933522379693e-06, + "loss": 0.9211, + "step": 6112 + }, + { + "epoch": 1.27, + "learning_rate": 1.769796383411834e-06, + "loss": 0.9322, + "step": 6113 + }, + { + "epoch": 1.27, + "learning_rate": 1.7688995467709895e-06, + "loss": 1.0455, + "step": 6114 + }, + { + "epoch": 1.27, + "learning_rate": 1.768002842411799e-06, + "loss": 0.9758, + "step": 6115 + }, + { + "epoch": 1.27, + "learning_rate": 1.767106270430617e-06, + "loss": 0.8386, + "step": 6116 + }, + { + "epoch": 1.27, + "learning_rate": 1.7662098309237782e-06, + "loss": 0.8856, + "step": 6117 + }, + { + "epoch": 1.27, + "learning_rate": 1.7653135239876045e-06, + "loss": 0.9046, + "step": 6118 + }, + { + "epoch": 1.27, + "learning_rate": 1.7644173497184042e-06, + "loss": 0.8257, + "step": 6119 + }, + { + "epoch": 1.27, + "learning_rate": 1.7635213082124722e-06, + "loss": 0.9017, + "step": 6120 + }, + { + "epoch": 1.27, + "learning_rate": 1.7626253995660871e-06, + "loss": 0.821, + "step": 6121 + }, + { + "epoch": 1.27, + "learning_rate": 1.7617296238755153e-06, + "loss": 1.0106, + "step": 6122 + }, + { + "epoch": 1.27, + "learning_rate": 1.760833981237005e-06, + "loss": 0.991, + "step": 6123 + }, + { + "epoch": 1.27, + "learning_rate": 1.7599384717467967e-06, + "loss": 0.8135, + "step": 6124 + }, + { + "epoch": 1.27, + "learning_rate": 1.7590430955011112e-06, + "loss": 0.9882, + "step": 6125 + }, + { + "epoch": 1.27, + "learning_rate": 1.758147852596156e-06, + "loss": 0.8935, + "step": 6126 + }, + { + "epoch": 1.27, + "learning_rate": 1.7572527431281253e-06, + "loss": 0.821, + "step": 6127 + }, + { + "epoch": 1.27, + "learning_rate": 1.7563577671931987e-06, + "loss": 0.7991, + "step": 6128 + }, + { + "epoch": 1.27, + "learning_rate": 1.7554629248875418e-06, + "loss": 0.6532, + "step": 6129 + }, + { + "epoch": 1.27, + "learning_rate": 1.7545682163073046e-06, + "loss": 0.9341, + "step": 6130 + }, + { + "epoch": 1.28, + "learning_rate": 1.7536736415486233e-06, + "loss": 0.9226, + "step": 6131 + }, + { + "epoch": 1.28, + "learning_rate": 1.7527792007076201e-06, + "loss": 0.8733, + "step": 6132 + }, + { + "epoch": 1.28, + "learning_rate": 1.7518848938804033e-06, + "loss": 0.9524, + "step": 6133 + }, + { + "epoch": 1.28, + "learning_rate": 1.7509907211630659e-06, + "loss": 0.9187, + "step": 6134 + }, + { + "epoch": 1.28, + "learning_rate": 1.7500966826516858e-06, + "loss": 0.8043, + "step": 6135 + }, + { + "epoch": 1.28, + "learning_rate": 1.7492027784423274e-06, + "loss": 1.039, + "step": 6136 + }, + { + "epoch": 1.28, + "learning_rate": 1.748309008631043e-06, + "loss": 0.9259, + "step": 6137 + }, + { + "epoch": 1.28, + "learning_rate": 1.747415373313865e-06, + "loss": 0.9261, + "step": 6138 + }, + { + "epoch": 1.28, + "learning_rate": 1.7465218725868156e-06, + "loss": 1.0159, + "step": 6139 + }, + { + "epoch": 1.28, + "learning_rate": 1.7456285065459014e-06, + "loss": 0.7694, + "step": 6140 + }, + { + "epoch": 1.28, + "learning_rate": 1.7447352752871164e-06, + "loss": 0.9484, + "step": 6141 + }, + { + "epoch": 1.28, + "learning_rate": 1.7438421789064348e-06, + "loss": 0.7444, + "step": 6142 + }, + { + "epoch": 1.28, + "learning_rate": 1.7429492174998216e-06, + "loss": 1.0403, + "step": 6143 + }, + { + "epoch": 1.28, + "learning_rate": 1.7420563911632253e-06, + "loss": 0.9108, + "step": 6144 + }, + { + "epoch": 1.28, + "learning_rate": 1.7411636999925815e-06, + "loss": 0.8635, + "step": 6145 + }, + { + "epoch": 1.28, + "learning_rate": 1.7402711440838063e-06, + "loss": 0.8406, + "step": 6146 + }, + { + "epoch": 1.28, + "learning_rate": 1.7393787235328077e-06, + "loss": 0.8295, + "step": 6147 + }, + { + "epoch": 1.28, + "learning_rate": 1.7384864384354748e-06, + "loss": 1.1513, + "step": 6148 + }, + { + "epoch": 1.28, + "learning_rate": 1.7375942888876854e-06, + "loss": 0.9351, + "step": 6149 + }, + { + "epoch": 1.28, + "learning_rate": 1.7367022749852982e-06, + "loss": 1.0673, + "step": 6150 + }, + { + "epoch": 1.28, + "learning_rate": 1.7358103968241616e-06, + "loss": 0.8407, + "step": 6151 + }, + { + "epoch": 1.28, + "learning_rate": 1.7349186545001075e-06, + "loss": 0.7166, + "step": 6152 + }, + { + "epoch": 1.28, + "learning_rate": 1.7340270481089548e-06, + "loss": 0.8415, + "step": 6153 + }, + { + "epoch": 1.28, + "learning_rate": 1.7331355777465045e-06, + "loss": 1.139, + "step": 6154 + }, + { + "epoch": 1.28, + "learning_rate": 1.7322442435085459e-06, + "loss": 1.1235, + "step": 6155 + }, + { + "epoch": 1.28, + "learning_rate": 1.7313530454908531e-06, + "loss": 0.9599, + "step": 6156 + }, + { + "epoch": 1.28, + "learning_rate": 1.730461983789186e-06, + "loss": 0.6763, + "step": 6157 + }, + { + "epoch": 1.28, + "learning_rate": 1.7295710584992857e-06, + "loss": 0.7838, + "step": 6158 + }, + { + "epoch": 1.28, + "learning_rate": 1.728680269716887e-06, + "loss": 1.0594, + "step": 6159 + }, + { + "epoch": 1.28, + "learning_rate": 1.7277896175377013e-06, + "loss": 0.9367, + "step": 6160 + }, + { + "epoch": 1.28, + "learning_rate": 1.7268991020574316e-06, + "loss": 0.8286, + "step": 6161 + }, + { + "epoch": 1.28, + "learning_rate": 1.72600872337176e-06, + "loss": 0.7283, + "step": 6162 + }, + { + "epoch": 1.28, + "learning_rate": 1.7251184815763632e-06, + "loss": 0.736, + "step": 6163 + }, + { + "epoch": 1.28, + "learning_rate": 1.724228376766893e-06, + "loss": 1.093, + "step": 6164 + }, + { + "epoch": 1.28, + "learning_rate": 1.7233384090389933e-06, + "loss": 0.8445, + "step": 6165 + }, + { + "epoch": 1.28, + "learning_rate": 1.722448578488289e-06, + "loss": 1.0559, + "step": 6166 + }, + { + "epoch": 1.28, + "learning_rate": 1.7215588852103958e-06, + "loss": 0.829, + "step": 6167 + }, + { + "epoch": 1.28, + "learning_rate": 1.720669329300908e-06, + "loss": 0.9469, + "step": 6168 + }, + { + "epoch": 1.28, + "learning_rate": 1.7197799108554107e-06, + "loss": 0.9053, + "step": 6169 + }, + { + "epoch": 1.28, + "learning_rate": 1.7188906299694677e-06, + "loss": 0.8337, + "step": 6170 + }, + { + "epoch": 1.28, + "learning_rate": 1.7180014867386381e-06, + "loss": 1.031, + "step": 6171 + }, + { + "epoch": 1.28, + "learning_rate": 1.7171124812584555e-06, + "loss": 0.9751, + "step": 6172 + }, + { + "epoch": 1.28, + "learning_rate": 1.7162236136244473e-06, + "loss": 0.8649, + "step": 6173 + }, + { + "epoch": 1.28, + "learning_rate": 1.7153348839321173e-06, + "loss": 0.8392, + "step": 6174 + }, + { + "epoch": 1.28, + "learning_rate": 1.7144462922769648e-06, + "loss": 0.8508, + "step": 6175 + }, + { + "epoch": 1.28, + "learning_rate": 1.713557838754465e-06, + "loss": 1.0268, + "step": 6176 + }, + { + "epoch": 1.28, + "learning_rate": 1.7126695234600848e-06, + "loss": 0.9356, + "step": 6177 + }, + { + "epoch": 1.28, + "learning_rate": 1.7117813464892707e-06, + "loss": 1.0111, + "step": 6178 + }, + { + "epoch": 1.29, + "learning_rate": 1.7108933079374607e-06, + "loss": 0.9264, + "step": 6179 + }, + { + "epoch": 1.29, + "learning_rate": 1.710005407900072e-06, + "loss": 0.8498, + "step": 6180 + }, + { + "epoch": 1.29, + "learning_rate": 1.7091176464725115e-06, + "loss": 0.9505, + "step": 6181 + }, + { + "epoch": 1.29, + "learning_rate": 1.7082300237501652e-06, + "loss": 0.9764, + "step": 6182 + }, + { + "epoch": 1.29, + "learning_rate": 1.7073425398284126e-06, + "loss": 0.7815, + "step": 6183 + }, + { + "epoch": 1.29, + "learning_rate": 1.7064551948026111e-06, + "loss": 0.6727, + "step": 6184 + }, + { + "epoch": 1.29, + "learning_rate": 1.7055679887681078e-06, + "loss": 0.8685, + "step": 6185 + }, + { + "epoch": 1.29, + "learning_rate": 1.7046809218202295e-06, + "loss": 1.008, + "step": 6186 + }, + { + "epoch": 1.29, + "learning_rate": 1.7037939940542956e-06, + "loss": 1.0032, + "step": 6187 + }, + { + "epoch": 1.29, + "learning_rate": 1.702907205565604e-06, + "loss": 0.7433, + "step": 6188 + }, + { + "epoch": 1.29, + "learning_rate": 1.7020205564494412e-06, + "loss": 1.1251, + "step": 6189 + }, + { + "epoch": 1.29, + "learning_rate": 1.701134046801075e-06, + "loss": 0.9687, + "step": 6190 + }, + { + "epoch": 1.29, + "learning_rate": 1.700247676715765e-06, + "loss": 0.8505, + "step": 6191 + }, + { + "epoch": 1.29, + "learning_rate": 1.699361446288748e-06, + "loss": 0.9139, + "step": 6192 + }, + { + "epoch": 1.29, + "learning_rate": 1.6984753556152523e-06, + "loss": 0.8837, + "step": 6193 + }, + { + "epoch": 1.29, + "learning_rate": 1.6975894047904846e-06, + "loss": 0.9306, + "step": 6194 + }, + { + "epoch": 1.29, + "learning_rate": 1.6967035939096441e-06, + "loss": 0.9663, + "step": 6195 + }, + { + "epoch": 1.29, + "learning_rate": 1.6958179230679086e-06, + "loss": 0.9366, + "step": 6196 + }, + { + "epoch": 1.29, + "learning_rate": 1.6949323923604456e-06, + "loss": 0.8347, + "step": 6197 + }, + { + "epoch": 1.29, + "learning_rate": 1.6940470018824014e-06, + "loss": 1.0598, + "step": 6198 + }, + { + "epoch": 1.29, + "learning_rate": 1.6931617517289157e-06, + "loss": 0.78, + "step": 6199 + }, + { + "epoch": 1.29, + "learning_rate": 1.692276641995106e-06, + "loss": 0.9287, + "step": 6200 + }, + { + "epoch": 1.29, + "learning_rate": 1.6913916727760786e-06, + "loss": 0.9251, + "step": 6201 + }, + { + "epoch": 1.29, + "learning_rate": 1.69050684416692e-06, + "loss": 0.8396, + "step": 6202 + }, + { + "epoch": 1.29, + "learning_rate": 1.6896221562627102e-06, + "loss": 0.7182, + "step": 6203 + }, + { + "epoch": 1.29, + "learning_rate": 1.6887376091585048e-06, + "loss": 0.7879, + "step": 6204 + }, + { + "epoch": 1.29, + "learning_rate": 1.6878532029493512e-06, + "loss": 0.8719, + "step": 6205 + }, + { + "epoch": 1.29, + "learning_rate": 1.6869689377302747e-06, + "loss": 0.8647, + "step": 6206 + }, + { + "epoch": 1.29, + "learning_rate": 1.6860848135962946e-06, + "loss": 0.9297, + "step": 6207 + }, + { + "epoch": 1.29, + "learning_rate": 1.6852008306424065e-06, + "loss": 1.0546, + "step": 6208 + }, + { + "epoch": 1.29, + "learning_rate": 1.6843169889635962e-06, + "loss": 0.8382, + "step": 6209 + }, + { + "epoch": 1.29, + "learning_rate": 1.6834332886548292e-06, + "loss": 0.94, + "step": 6210 + }, + { + "epoch": 1.29, + "learning_rate": 1.682549729811064e-06, + "loss": 0.8753, + "step": 6211 + }, + { + "epoch": 1.29, + "learning_rate": 1.6816663125272354e-06, + "loss": 0.9137, + "step": 6212 + }, + { + "epoch": 1.29, + "learning_rate": 1.6807830368982682e-06, + "loss": 0.9677, + "step": 6213 + }, + { + "epoch": 1.29, + "learning_rate": 1.6798999030190669e-06, + "loss": 0.8722, + "step": 6214 + }, + { + "epoch": 1.29, + "learning_rate": 1.6790169109845298e-06, + "loss": 0.8263, + "step": 6215 + }, + { + "epoch": 1.29, + "learning_rate": 1.6781340608895298e-06, + "loss": 1.0007, + "step": 6216 + }, + { + "epoch": 1.29, + "learning_rate": 1.6772513528289318e-06, + "loss": 0.7785, + "step": 6217 + }, + { + "epoch": 1.29, + "learning_rate": 1.6763687868975792e-06, + "loss": 1.1013, + "step": 6218 + }, + { + "epoch": 1.29, + "learning_rate": 1.6754863631903085e-06, + "loss": 1.066, + "step": 6219 + }, + { + "epoch": 1.29, + "learning_rate": 1.6746040818019326e-06, + "loss": 0.8698, + "step": 6220 + }, + { + "epoch": 1.29, + "learning_rate": 1.673721942827254e-06, + "loss": 0.8136, + "step": 6221 + }, + { + "epoch": 1.29, + "learning_rate": 1.6728399463610566e-06, + "loss": 0.7275, + "step": 6222 + }, + { + "epoch": 1.29, + "learning_rate": 1.6719580924981137e-06, + "loss": 1.0515, + "step": 6223 + }, + { + "epoch": 1.29, + "learning_rate": 1.6710763813331785e-06, + "loss": 0.8105, + "step": 6224 + }, + { + "epoch": 1.29, + "learning_rate": 1.670194812960991e-06, + "loss": 1.0229, + "step": 6225 + }, + { + "epoch": 1.29, + "learning_rate": 1.6693133874762758e-06, + "loss": 0.6953, + "step": 6226 + }, + { + "epoch": 1.3, + "learning_rate": 1.6684321049737438e-06, + "loss": 0.8671, + "step": 6227 + }, + { + "epoch": 1.3, + "learning_rate": 1.6675509655480854e-06, + "loss": 0.9071, + "step": 6228 + }, + { + "epoch": 1.3, + "learning_rate": 1.666669969293981e-06, + "loss": 0.9225, + "step": 6229 + }, + { + "epoch": 1.3, + "learning_rate": 1.665789116306093e-06, + "loss": 0.7641, + "step": 6230 + }, + { + "epoch": 1.3, + "learning_rate": 1.6649084066790707e-06, + "loss": 0.91, + "step": 6231 + }, + { + "epoch": 1.3, + "learning_rate": 1.6640278405075437e-06, + "loss": 0.8002, + "step": 6232 + }, + { + "epoch": 1.3, + "learning_rate": 1.6631474178861296e-06, + "loss": 1.0139, + "step": 6233 + }, + { + "epoch": 1.3, + "learning_rate": 1.6622671389094302e-06, + "loss": 1.0736, + "step": 6234 + }, + { + "epoch": 1.3, + "learning_rate": 1.6613870036720324e-06, + "loss": 0.9638, + "step": 6235 + }, + { + "epoch": 1.3, + "learning_rate": 1.6605070122685046e-06, + "loss": 0.7511, + "step": 6236 + }, + { + "epoch": 1.3, + "learning_rate": 1.6596271647934024e-06, + "loss": 0.9495, + "step": 6237 + }, + { + "epoch": 1.3, + "learning_rate": 1.658747461341266e-06, + "loss": 0.7576, + "step": 6238 + }, + { + "epoch": 1.3, + "learning_rate": 1.65786790200662e-06, + "loss": 0.9337, + "step": 6239 + }, + { + "epoch": 1.3, + "learning_rate": 1.6569884868839716e-06, + "loss": 0.8762, + "step": 6240 + }, + { + "epoch": 1.3, + "learning_rate": 1.6561092160678138e-06, + "loss": 0.8497, + "step": 6241 + }, + { + "epoch": 1.3, + "learning_rate": 1.6552300896526252e-06, + "loss": 0.9074, + "step": 6242 + }, + { + "epoch": 1.3, + "learning_rate": 1.6543511077328681e-06, + "loss": 0.8182, + "step": 6243 + }, + { + "epoch": 1.3, + "learning_rate": 1.653472270402988e-06, + "loss": 0.7974, + "step": 6244 + }, + { + "epoch": 1.3, + "learning_rate": 1.6525935777574162e-06, + "loss": 0.8408, + "step": 6245 + }, + { + "epoch": 1.3, + "learning_rate": 1.651715029890568e-06, + "loss": 0.7185, + "step": 6246 + }, + { + "epoch": 1.3, + "learning_rate": 1.6508366268968449e-06, + "loss": 0.8391, + "step": 6247 + }, + { + "epoch": 1.3, + "learning_rate": 1.649958368870629e-06, + "loss": 1.1535, + "step": 6248 + }, + { + "epoch": 1.3, + "learning_rate": 1.6490802559062895e-06, + "loss": 0.8201, + "step": 6249 + }, + { + "epoch": 1.3, + "learning_rate": 1.648202288098181e-06, + "loss": 1.0687, + "step": 6250 + }, + { + "epoch": 1.3, + "learning_rate": 1.6473244655406408e-06, + "loss": 1.02, + "step": 6251 + }, + { + "epoch": 1.3, + "learning_rate": 1.6464467883279893e-06, + "loss": 0.8659, + "step": 6252 + }, + { + "epoch": 1.3, + "learning_rate": 1.6455692565545332e-06, + "loss": 0.7524, + "step": 6253 + }, + { + "epoch": 1.3, + "learning_rate": 1.6446918703145646e-06, + "loss": 0.9569, + "step": 6254 + }, + { + "epoch": 1.3, + "learning_rate": 1.6438146297023587e-06, + "loss": 0.8073, + "step": 6255 + }, + { + "epoch": 1.3, + "learning_rate": 1.6429375348121726e-06, + "loss": 0.9109, + "step": 6256 + }, + { + "epoch": 1.3, + "learning_rate": 1.642060585738252e-06, + "loss": 0.8413, + "step": 6257 + }, + { + "epoch": 1.3, + "learning_rate": 1.6411837825748244e-06, + "loss": 0.8455, + "step": 6258 + }, + { + "epoch": 1.3, + "learning_rate": 1.6403071254161038e-06, + "loss": 0.9941, + "step": 6259 + }, + { + "epoch": 1.3, + "learning_rate": 1.639430614356284e-06, + "loss": 1.1455, + "step": 6260 + }, + { + "epoch": 1.3, + "learning_rate": 1.6385542494895476e-06, + "loss": 0.8294, + "step": 6261 + }, + { + "epoch": 1.3, + "learning_rate": 1.6376780309100604e-06, + "loss": 0.9929, + "step": 6262 + }, + { + "epoch": 1.3, + "learning_rate": 1.6368019587119723e-06, + "loss": 0.8904, + "step": 6263 + }, + { + "epoch": 1.3, + "learning_rate": 1.6359260329894152e-06, + "loss": 0.9965, + "step": 6264 + }, + { + "epoch": 1.3, + "learning_rate": 1.6350502538365087e-06, + "loss": 0.7961, + "step": 6265 + }, + { + "epoch": 1.3, + "learning_rate": 1.6341746213473552e-06, + "loss": 1.1837, + "step": 6266 + }, + { + "epoch": 1.3, + "learning_rate": 1.6332991356160422e-06, + "loss": 1.0118, + "step": 6267 + }, + { + "epoch": 1.3, + "learning_rate": 1.6324237967366383e-06, + "loss": 0.8274, + "step": 6268 + }, + { + "epoch": 1.3, + "learning_rate": 1.6315486048031997e-06, + "loss": 0.9059, + "step": 6269 + }, + { + "epoch": 1.3, + "learning_rate": 1.6306735599097663e-06, + "loss": 0.7645, + "step": 6270 + }, + { + "epoch": 1.3, + "learning_rate": 1.629798662150362e-06, + "loss": 1.0571, + "step": 6271 + }, + { + "epoch": 1.3, + "learning_rate": 1.6289239116189926e-06, + "loss": 1.0293, + "step": 6272 + }, + { + "epoch": 1.3, + "learning_rate": 1.6280493084096512e-06, + "loss": 0.9126, + "step": 6273 + }, + { + "epoch": 1.3, + "learning_rate": 1.627174852616314e-06, + "loss": 0.8203, + "step": 6274 + }, + { + "epoch": 1.31, + "learning_rate": 1.6263005443329416e-06, + "loss": 0.8327, + "step": 6275 + }, + { + "epoch": 1.31, + "learning_rate": 1.6254263836534764e-06, + "loss": 0.7758, + "step": 6276 + }, + { + "epoch": 1.31, + "learning_rate": 1.6245523706718484e-06, + "loss": 1.1886, + "step": 6277 + }, + { + "epoch": 1.31, + "learning_rate": 1.6236785054819703e-06, + "loss": 0.7934, + "step": 6278 + }, + { + "epoch": 1.31, + "learning_rate": 1.6228047881777394e-06, + "loss": 0.7606, + "step": 6279 + }, + { + "epoch": 1.31, + "learning_rate": 1.6219312188530346e-06, + "loss": 0.9329, + "step": 6280 + }, + { + "epoch": 1.31, + "learning_rate": 1.621057797601722e-06, + "loss": 0.9313, + "step": 6281 + }, + { + "epoch": 1.31, + "learning_rate": 1.6201845245176506e-06, + "loss": 0.9242, + "step": 6282 + }, + { + "epoch": 1.31, + "learning_rate": 1.6193113996946547e-06, + "loss": 0.839, + "step": 6283 + }, + { + "epoch": 1.31, + "learning_rate": 1.6184384232265495e-06, + "loss": 0.7903, + "step": 6284 + }, + { + "epoch": 1.31, + "learning_rate": 1.617565595207137e-06, + "loss": 0.9539, + "step": 6285 + }, + { + "epoch": 1.31, + "learning_rate": 1.6166929157302025e-06, + "loss": 0.8768, + "step": 6286 + }, + { + "epoch": 1.31, + "learning_rate": 1.615820384889517e-06, + "loss": 1.0711, + "step": 6287 + }, + { + "epoch": 1.31, + "learning_rate": 1.614948002778831e-06, + "loss": 0.8161, + "step": 6288 + }, + { + "epoch": 1.31, + "learning_rate": 1.6140757694918833e-06, + "loss": 0.7887, + "step": 6289 + }, + { + "epoch": 1.31, + "learning_rate": 1.6132036851223952e-06, + "loss": 0.8481, + "step": 6290 + }, + { + "epoch": 1.31, + "learning_rate": 1.6123317497640736e-06, + "loss": 0.7175, + "step": 6291 + }, + { + "epoch": 1.31, + "learning_rate": 1.6114599635106047e-06, + "loss": 0.7986, + "step": 6292 + }, + { + "epoch": 1.31, + "learning_rate": 1.610588326455664e-06, + "loss": 0.8596, + "step": 6293 + }, + { + "epoch": 1.31, + "learning_rate": 1.6097168386929086e-06, + "loss": 0.883, + "step": 6294 + }, + { + "epoch": 1.31, + "learning_rate": 1.6088455003159806e-06, + "loss": 1.02, + "step": 6295 + }, + { + "epoch": 1.31, + "learning_rate": 1.6079743114185035e-06, + "loss": 0.8691, + "step": 6296 + }, + { + "epoch": 1.31, + "learning_rate": 1.607103272094087e-06, + "loss": 0.7803, + "step": 6297 + }, + { + "epoch": 1.31, + "learning_rate": 1.6062323824363248e-06, + "loss": 0.7346, + "step": 6298 + }, + { + "epoch": 1.31, + "learning_rate": 1.6053616425387944e-06, + "loss": 1.0815, + "step": 6299 + }, + { + "epoch": 1.31, + "learning_rate": 1.604491052495055e-06, + "loss": 0.8306, + "step": 6300 + }, + { + "epoch": 1.31, + "learning_rate": 1.6036206123986525e-06, + "loss": 0.9098, + "step": 6301 + }, + { + "epoch": 1.31, + "learning_rate": 1.6027503223431152e-06, + "loss": 0.9758, + "step": 6302 + }, + { + "epoch": 1.31, + "learning_rate": 1.601880182421958e-06, + "loss": 0.9194, + "step": 6303 + }, + { + "epoch": 1.31, + "learning_rate": 1.6010101927286736e-06, + "loss": 0.8249, + "step": 6304 + }, + { + "epoch": 1.31, + "learning_rate": 1.6001403533567445e-06, + "loss": 1.0197, + "step": 6305 + }, + { + "epoch": 1.31, + "learning_rate": 1.5992706643996341e-06, + "loss": 0.972, + "step": 6306 + }, + { + "epoch": 1.31, + "learning_rate": 1.5984011259507925e-06, + "loss": 0.8045, + "step": 6307 + }, + { + "epoch": 1.31, + "learning_rate": 1.5975317381036486e-06, + "loss": 1.1157, + "step": 6308 + }, + { + "epoch": 1.31, + "learning_rate": 1.5966625009516195e-06, + "loss": 1.0806, + "step": 6309 + }, + { + "epoch": 1.31, + "learning_rate": 1.5957934145881044e-06, + "loss": 0.7344, + "step": 6310 + }, + { + "epoch": 1.31, + "learning_rate": 1.5949244791064879e-06, + "loss": 1.0892, + "step": 6311 + }, + { + "epoch": 1.31, + "learning_rate": 1.5940556946001344e-06, + "loss": 1.0712, + "step": 6312 + }, + { + "epoch": 1.31, + "learning_rate": 1.5931870611623964e-06, + "loss": 0.8763, + "step": 6313 + }, + { + "epoch": 1.31, + "learning_rate": 1.5923185788866083e-06, + "loss": 0.9586, + "step": 6314 + }, + { + "epoch": 1.31, + "learning_rate": 1.5914502478660899e-06, + "loss": 0.8999, + "step": 6315 + }, + { + "epoch": 1.31, + "learning_rate": 1.5905820681941402e-06, + "loss": 0.9955, + "step": 6316 + }, + { + "epoch": 1.31, + "learning_rate": 1.589714039964047e-06, + "loss": 1.2504, + "step": 6317 + }, + { + "epoch": 1.31, + "learning_rate": 1.5888461632690797e-06, + "loss": 0.9251, + "step": 6318 + }, + { + "epoch": 1.31, + "learning_rate": 1.5879784382024921e-06, + "loss": 0.8448, + "step": 6319 + }, + { + "epoch": 1.31, + "learning_rate": 1.5871108648575197e-06, + "loss": 1.0182, + "step": 6320 + }, + { + "epoch": 1.31, + "learning_rate": 1.586243443327384e-06, + "loss": 0.9388, + "step": 6321 + }, + { + "epoch": 1.31, + "learning_rate": 1.5853761737052894e-06, + "loss": 0.8286, + "step": 6322 + }, + { + "epoch": 1.32, + "learning_rate": 1.5845090560844253e-06, + "loss": 0.6769, + "step": 6323 + }, + { + "epoch": 1.32, + "learning_rate": 1.5836420905579611e-06, + "loss": 0.8509, + "step": 6324 + }, + { + "epoch": 1.32, + "learning_rate": 1.5827752772190528e-06, + "loss": 0.9496, + "step": 6325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5819086161608399e-06, + "loss": 1.0023, + "step": 6326 + }, + { + "epoch": 1.32, + "learning_rate": 1.5810421074764464e-06, + "loss": 1.004, + "step": 6327 + }, + { + "epoch": 1.32, + "learning_rate": 1.580175751258976e-06, + "loss": 0.9957, + "step": 6328 + }, + { + "epoch": 1.32, + "learning_rate": 1.5793095476015197e-06, + "loss": 1.1213, + "step": 6329 + }, + { + "epoch": 1.32, + "learning_rate": 1.578443496597151e-06, + "loss": 0.7954, + "step": 6330 + }, + { + "epoch": 1.32, + "learning_rate": 1.5775775983389286e-06, + "loss": 0.8017, + "step": 6331 + }, + { + "epoch": 1.32, + "learning_rate": 1.5767118529198906e-06, + "loss": 0.7391, + "step": 6332 + }, + { + "epoch": 1.32, + "learning_rate": 1.5758462604330622e-06, + "loss": 0.7956, + "step": 6333 + }, + { + "epoch": 1.32, + "learning_rate": 1.5749808209714518e-06, + "loss": 0.952, + "step": 6334 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741155346280511e-06, + "loss": 1.0083, + "step": 6335 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732504014958336e-06, + "loss": 0.7832, + "step": 6336 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723854216677588e-06, + "loss": 0.9005, + "step": 6337 + }, + { + "epoch": 1.32, + "learning_rate": 1.5715205952367685e-06, + "loss": 0.9964, + "step": 6338 + }, + { + "epoch": 1.32, + "learning_rate": 1.5706559222957897e-06, + "loss": 0.9652, + "step": 6339 + }, + { + "epoch": 1.32, + "learning_rate": 1.5697914029377283e-06, + "loss": 0.7807, + "step": 6340 + }, + { + "epoch": 1.32, + "learning_rate": 1.568927037255479e-06, + "loss": 0.9763, + "step": 6341 + }, + { + "epoch": 1.32, + "learning_rate": 1.5680628253419173e-06, + "loss": 0.8453, + "step": 6342 + }, + { + "epoch": 1.32, + "learning_rate": 1.5671987672899039e-06, + "loss": 0.7279, + "step": 6343 + }, + { + "epoch": 1.32, + "learning_rate": 1.5663348631922797e-06, + "loss": 0.7467, + "step": 6344 + }, + { + "epoch": 1.32, + "learning_rate": 1.5654711131418725e-06, + "loss": 1.1367, + "step": 6345 + }, + { + "epoch": 1.32, + "learning_rate": 1.5646075172314916e-06, + "loss": 0.98, + "step": 6346 + }, + { + "epoch": 1.32, + "learning_rate": 1.5637440755539317e-06, + "loss": 0.8268, + "step": 6347 + }, + { + "epoch": 1.32, + "learning_rate": 1.5628807882019666e-06, + "loss": 0.8519, + "step": 6348 + }, + { + "epoch": 1.32, + "learning_rate": 1.5620176552683591e-06, + "loss": 0.8129, + "step": 6349 + }, + { + "epoch": 1.32, + "learning_rate": 1.5611546768458516e-06, + "loss": 0.8017, + "step": 6350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5602918530271723e-06, + "loss": 1.0613, + "step": 6351 + }, + { + "epoch": 1.32, + "learning_rate": 1.55942918390503e-06, + "loss": 0.8492, + "step": 6352 + }, + { + "epoch": 1.32, + "learning_rate": 1.5585666695721186e-06, + "loss": 0.9083, + "step": 6353 + }, + { + "epoch": 1.32, + "learning_rate": 1.5577043101211171e-06, + "loss": 1.0572, + "step": 6354 + }, + { + "epoch": 1.32, + "learning_rate": 1.5568421056446823e-06, + "loss": 0.9786, + "step": 6355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5559800562354628e-06, + "loss": 0.8268, + "step": 6356 + }, + { + "epoch": 1.32, + "learning_rate": 1.5551181619860817e-06, + "loss": 0.8307, + "step": 6357 + }, + { + "epoch": 1.32, + "learning_rate": 1.5542564229891524e-06, + "loss": 1.0004, + "step": 6358 + }, + { + "epoch": 1.32, + "learning_rate": 1.5533948393372644e-06, + "loss": 0.8556, + "step": 6359 + }, + { + "epoch": 1.32, + "learning_rate": 1.5525334111230005e-06, + "loss": 0.8365, + "step": 6360 + }, + { + "epoch": 1.32, + "learning_rate": 1.551672138438917e-06, + "loss": 0.9705, + "step": 6361 + }, + { + "epoch": 1.32, + "learning_rate": 1.5508110213775604e-06, + "loss": 0.8012, + "step": 6362 + }, + { + "epoch": 1.32, + "learning_rate": 1.5499500600314534e-06, + "loss": 0.898, + "step": 6363 + }, + { + "epoch": 1.32, + "learning_rate": 1.549089254493111e-06, + "loss": 0.7168, + "step": 6364 + }, + { + "epoch": 1.32, + "learning_rate": 1.5482286048550241e-06, + "loss": 0.9309, + "step": 6365 + }, + { + "epoch": 1.32, + "learning_rate": 1.5473681112096712e-06, + "loss": 0.8917, + "step": 6366 + }, + { + "epoch": 1.32, + "learning_rate": 1.5465077736495083e-06, + "loss": 1.0472, + "step": 6367 + }, + { + "epoch": 1.32, + "learning_rate": 1.545647592266984e-06, + "loss": 0.763, + "step": 6368 + }, + { + "epoch": 1.32, + "learning_rate": 1.544787567154521e-06, + "loss": 1.0612, + "step": 6369 + }, + { + "epoch": 1.32, + "learning_rate": 1.5439276984045311e-06, + "loss": 0.8555, + "step": 6370 + }, + { + "epoch": 1.33, + "learning_rate": 1.5430679861094038e-06, + "loss": 0.7581, + "step": 6371 + }, + { + "epoch": 1.33, + "learning_rate": 1.54220843036152e-06, + "loss": 0.8629, + "step": 6372 + }, + { + "epoch": 1.33, + "learning_rate": 1.541349031253235e-06, + "loss": 0.6856, + "step": 6373 + }, + { + "epoch": 1.33, + "learning_rate": 1.5404897888768935e-06, + "loss": 1.0474, + "step": 6374 + }, + { + "epoch": 1.33, + "learning_rate": 1.5396307033248176e-06, + "loss": 0.8698, + "step": 6375 + }, + { + "epoch": 1.33, + "learning_rate": 1.538771774689321e-06, + "loss": 0.8298, + "step": 6376 + }, + { + "epoch": 1.33, + "learning_rate": 1.5379130030626918e-06, + "loss": 0.7206, + "step": 6377 + }, + { + "epoch": 1.33, + "learning_rate": 1.5370543885372074e-06, + "loss": 1.03, + "step": 6378 + }, + { + "epoch": 1.33, + "learning_rate": 1.5361959312051221e-06, + "loss": 0.7159, + "step": 6379 + }, + { + "epoch": 1.33, + "learning_rate": 1.5353376311586816e-06, + "loss": 1.1282, + "step": 6380 + }, + { + "epoch": 1.33, + "learning_rate": 1.5344794884901072e-06, + "loss": 0.8819, + "step": 6381 + }, + { + "epoch": 1.33, + "learning_rate": 1.5336215032916082e-06, + "loss": 0.9351, + "step": 6382 + }, + { + "epoch": 1.33, + "learning_rate": 1.5327636756553723e-06, + "loss": 0.8408, + "step": 6383 + }, + { + "epoch": 1.33, + "learning_rate": 1.5319060056735762e-06, + "loss": 0.952, + "step": 6384 + }, + { + "epoch": 1.33, + "learning_rate": 1.5310484934383741e-06, + "loss": 0.9616, + "step": 6385 + }, + { + "epoch": 1.33, + "learning_rate": 1.5301911390419081e-06, + "loss": 1.099, + "step": 6386 + }, + { + "epoch": 1.33, + "learning_rate": 1.5293339425762969e-06, + "loss": 0.9369, + "step": 6387 + }, + { + "epoch": 1.33, + "learning_rate": 1.528476904133651e-06, + "loss": 0.8772, + "step": 6388 + }, + { + "epoch": 1.33, + "learning_rate": 1.5276200238060553e-06, + "loss": 1.0025, + "step": 6389 + }, + { + "epoch": 1.33, + "learning_rate": 1.5267633016855841e-06, + "loss": 0.8877, + "step": 6390 + }, + { + "epoch": 1.33, + "learning_rate": 1.525906737864289e-06, + "loss": 0.6582, + "step": 6391 + }, + { + "epoch": 1.33, + "learning_rate": 1.5250503324342118e-06, + "loss": 0.889, + "step": 6392 + }, + { + "epoch": 1.33, + "learning_rate": 1.52419408548737e-06, + "loss": 1.0023, + "step": 6393 + }, + { + "epoch": 1.33, + "learning_rate": 1.5233379971157697e-06, + "loss": 0.7995, + "step": 6394 + }, + { + "epoch": 1.33, + "learning_rate": 1.5224820674113938e-06, + "loss": 1.1307, + "step": 6395 + }, + { + "epoch": 1.33, + "learning_rate": 1.521626296466217e-06, + "loss": 0.8896, + "step": 6396 + }, + { + "epoch": 1.33, + "learning_rate": 1.5207706843721872e-06, + "loss": 0.9437, + "step": 6397 + }, + { + "epoch": 1.33, + "learning_rate": 1.5199152312212434e-06, + "loss": 0.9501, + "step": 6398 + }, + { + "epoch": 1.33, + "learning_rate": 1.5190599371053002e-06, + "loss": 0.7327, + "step": 6399 + }, + { + "epoch": 1.33, + "learning_rate": 1.5182048021162633e-06, + "loss": 0.9795, + "step": 6400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5173498263460136e-06, + "loss": 1.0659, + "step": 6401 + }, + { + "epoch": 1.33, + "learning_rate": 1.5164950098864206e-06, + "loss": 1.0153, + "step": 6402 + }, + { + "epoch": 1.33, + "learning_rate": 1.5156403528293304e-06, + "loss": 1.0047, + "step": 6403 + }, + { + "epoch": 1.33, + "learning_rate": 1.5147858552665805e-06, + "loss": 0.919, + "step": 6404 + }, + { + "epoch": 1.33, + "learning_rate": 1.5139315172899833e-06, + "loss": 0.851, + "step": 6405 + }, + { + "epoch": 1.33, + "learning_rate": 1.5130773389913398e-06, + "loss": 0.9996, + "step": 6406 + }, + { + "epoch": 1.33, + "learning_rate": 1.5122233204624279e-06, + "loss": 0.9605, + "step": 6407 + }, + { + "epoch": 1.33, + "learning_rate": 1.5113694617950164e-06, + "loss": 0.8986, + "step": 6408 + }, + { + "epoch": 1.33, + "learning_rate": 1.5105157630808488e-06, + "loss": 0.7347, + "step": 6409 + }, + { + "epoch": 1.33, + "learning_rate": 1.5096622244116568e-06, + "loss": 0.8675, + "step": 6410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5088088458791508e-06, + "loss": 0.7405, + "step": 6411 + }, + { + "epoch": 1.33, + "learning_rate": 1.5079556275750302e-06, + "loss": 0.7895, + "step": 6412 + }, + { + "epoch": 1.33, + "learning_rate": 1.5071025695909696e-06, + "loss": 0.9907, + "step": 6413 + }, + { + "epoch": 1.33, + "learning_rate": 1.5062496720186322e-06, + "loss": 0.6518, + "step": 6414 + }, + { + "epoch": 1.33, + "learning_rate": 1.5053969349496592e-06, + "loss": 0.9494, + "step": 6415 + }, + { + "epoch": 1.33, + "learning_rate": 1.504544358475681e-06, + "loss": 0.7102, + "step": 6416 + }, + { + "epoch": 1.33, + "learning_rate": 1.5036919426883037e-06, + "loss": 1.0554, + "step": 6417 + }, + { + "epoch": 1.33, + "learning_rate": 1.5028396876791215e-06, + "loss": 0.9413, + "step": 6418 + }, + { + "epoch": 1.34, + "learning_rate": 1.5019875935397054e-06, + "loss": 0.8175, + "step": 6419 + }, + { + "epoch": 1.34, + "learning_rate": 1.5011356603616182e-06, + "loss": 0.963, + "step": 6420 + }, + { + "epoch": 1.34, + "learning_rate": 1.5002838882363962e-06, + "loss": 0.9991, + "step": 6421 + }, + { + "epoch": 1.34, + "learning_rate": 1.4994322772555628e-06, + "loss": 0.8045, + "step": 6422 + }, + { + "epoch": 1.34, + "learning_rate": 1.4985808275106247e-06, + "loss": 0.688, + "step": 6423 + }, + { + "epoch": 1.34, + "learning_rate": 1.4977295390930705e-06, + "loss": 1.0202, + "step": 6424 + }, + { + "epoch": 1.34, + "learning_rate": 1.4968784120943688e-06, + "loss": 0.7427, + "step": 6425 + }, + { + "epoch": 1.34, + "learning_rate": 1.4960274466059748e-06, + "loss": 0.6579, + "step": 6426 + }, + { + "epoch": 1.34, + "learning_rate": 1.4951766427193244e-06, + "loss": 0.9227, + "step": 6427 + }, + { + "epoch": 1.34, + "learning_rate": 1.4943260005258371e-06, + "loss": 0.7788, + "step": 6428 + }, + { + "epoch": 1.34, + "learning_rate": 1.4934755201169125e-06, + "loss": 0.7915, + "step": 6429 + }, + { + "epoch": 1.34, + "learning_rate": 1.492625201583936e-06, + "loss": 1.121, + "step": 6430 + }, + { + "epoch": 1.34, + "learning_rate": 1.491775045018274e-06, + "loss": 0.8462, + "step": 6431 + }, + { + "epoch": 1.34, + "learning_rate": 1.490925050511277e-06, + "loss": 0.9979, + "step": 6432 + }, + { + "epoch": 1.34, + "learning_rate": 1.4900752181542744e-06, + "loss": 0.7653, + "step": 6433 + }, + { + "epoch": 1.34, + "learning_rate": 1.4892255480385815e-06, + "loss": 0.9326, + "step": 6434 + }, + { + "epoch": 1.34, + "learning_rate": 1.4883760402554962e-06, + "loss": 1.0958, + "step": 6435 + }, + { + "epoch": 1.34, + "learning_rate": 1.4875266948962985e-06, + "loss": 0.7471, + "step": 6436 + }, + { + "epoch": 1.34, + "learning_rate": 1.4866775120522482e-06, + "loss": 0.8245, + "step": 6437 + }, + { + "epoch": 1.34, + "learning_rate": 1.4858284918145913e-06, + "loss": 0.8288, + "step": 6438 + }, + { + "epoch": 1.34, + "learning_rate": 1.484979634274555e-06, + "loss": 0.8852, + "step": 6439 + }, + { + "epoch": 1.34, + "learning_rate": 1.4841309395233501e-06, + "loss": 0.7825, + "step": 6440 + }, + { + "epoch": 1.34, + "learning_rate": 1.4832824076521661e-06, + "loss": 0.8879, + "step": 6441 + }, + { + "epoch": 1.34, + "learning_rate": 1.4824340387521794e-06, + "loss": 0.8528, + "step": 6442 + }, + { + "epoch": 1.34, + "learning_rate": 1.481585832914547e-06, + "loss": 1.297, + "step": 6443 + }, + { + "epoch": 1.34, + "learning_rate": 1.4807377902304094e-06, + "loss": 0.7444, + "step": 6444 + }, + { + "epoch": 1.34, + "learning_rate": 1.479889910790887e-06, + "loss": 1.0579, + "step": 6445 + }, + { + "epoch": 1.34, + "learning_rate": 1.479042194687085e-06, + "loss": 1.0806, + "step": 6446 + }, + { + "epoch": 1.34, + "learning_rate": 1.4781946420100906e-06, + "loss": 0.77, + "step": 6447 + }, + { + "epoch": 1.34, + "learning_rate": 1.477347252850975e-06, + "loss": 1.0815, + "step": 6448 + }, + { + "epoch": 1.34, + "learning_rate": 1.4765000273007867e-06, + "loss": 0.8441, + "step": 6449 + }, + { + "epoch": 1.34, + "learning_rate": 1.4756529654505619e-06, + "loss": 0.7747, + "step": 6450 + }, + { + "epoch": 1.34, + "learning_rate": 1.474806067391317e-06, + "loss": 0.8592, + "step": 6451 + }, + { + "epoch": 1.34, + "learning_rate": 1.4739593332140528e-06, + "loss": 1.1064, + "step": 6452 + }, + { + "epoch": 1.34, + "learning_rate": 1.473112763009748e-06, + "loss": 1.0341, + "step": 6453 + }, + { + "epoch": 1.34, + "learning_rate": 1.4722663568693682e-06, + "loss": 0.8132, + "step": 6454 + }, + { + "epoch": 1.34, + "learning_rate": 1.471420114883859e-06, + "loss": 0.829, + "step": 6455 + }, + { + "epoch": 1.34, + "learning_rate": 1.4705740371441507e-06, + "loss": 1.0679, + "step": 6456 + }, + { + "epoch": 1.34, + "learning_rate": 1.4697281237411516e-06, + "loss": 0.8693, + "step": 6457 + }, + { + "epoch": 1.34, + "learning_rate": 1.4688823747657568e-06, + "loss": 0.9122, + "step": 6458 + }, + { + "epoch": 1.34, + "learning_rate": 1.468036790308841e-06, + "loss": 1.0314, + "step": 6459 + }, + { + "epoch": 1.34, + "learning_rate": 1.4671913704612644e-06, + "loss": 0.9191, + "step": 6460 + }, + { + "epoch": 1.34, + "learning_rate": 1.4663461153138644e-06, + "loss": 0.9692, + "step": 6461 + }, + { + "epoch": 1.34, + "learning_rate": 1.4655010249574648e-06, + "loss": 0.8001, + "step": 6462 + }, + { + "epoch": 1.34, + "learning_rate": 1.4646560994828704e-06, + "loss": 0.9268, + "step": 6463 + }, + { + "epoch": 1.34, + "learning_rate": 1.46381133898087e-06, + "loss": 0.922, + "step": 6464 + }, + { + "epoch": 1.34, + "learning_rate": 1.4629667435422302e-06, + "loss": 0.8238, + "step": 6465 + }, + { + "epoch": 1.34, + "learning_rate": 1.4621223132577044e-06, + "loss": 0.7994, + "step": 6466 + }, + { + "epoch": 1.35, + "learning_rate": 1.4612780482180262e-06, + "loss": 0.8768, + "step": 6467 + }, + { + "epoch": 1.35, + "learning_rate": 1.4604339485139128e-06, + "loss": 0.9263, + "step": 6468 + }, + { + "epoch": 1.35, + "learning_rate": 1.459590014236061e-06, + "loss": 1.0007, + "step": 6469 + }, + { + "epoch": 1.35, + "learning_rate": 1.4587462454751519e-06, + "loss": 1.1394, + "step": 6470 + }, + { + "epoch": 1.35, + "learning_rate": 1.457902642321849e-06, + "loss": 0.8521, + "step": 6471 + }, + { + "epoch": 1.35, + "learning_rate": 1.4570592048667983e-06, + "loss": 0.9581, + "step": 6472 + }, + { + "epoch": 1.35, + "learning_rate": 1.4562159332006246e-06, + "loss": 0.9183, + "step": 6473 + }, + { + "epoch": 1.35, + "learning_rate": 1.4553728274139392e-06, + "loss": 0.8707, + "step": 6474 + }, + { + "epoch": 1.35, + "learning_rate": 1.4545298875973328e-06, + "loss": 1.0591, + "step": 6475 + }, + { + "epoch": 1.35, + "learning_rate": 1.453687113841381e-06, + "loss": 1.0025, + "step": 6476 + }, + { + "epoch": 1.35, + "learning_rate": 1.452844506236637e-06, + "loss": 0.7434, + "step": 6477 + }, + { + "epoch": 1.35, + "learning_rate": 1.4520020648736407e-06, + "loss": 0.812, + "step": 6478 + }, + { + "epoch": 1.35, + "learning_rate": 1.451159789842912e-06, + "loss": 0.8022, + "step": 6479 + }, + { + "epoch": 1.35, + "learning_rate": 1.4503176812349546e-06, + "loss": 0.9982, + "step": 6480 + }, + { + "epoch": 1.35, + "learning_rate": 1.4494757391402507e-06, + "loss": 0.8646, + "step": 6481 + }, + { + "epoch": 1.35, + "learning_rate": 1.4486339636492677e-06, + "loss": 1.0913, + "step": 6482 + }, + { + "epoch": 1.35, + "learning_rate": 1.4477923548524547e-06, + "loss": 0.8986, + "step": 6483 + }, + { + "epoch": 1.35, + "learning_rate": 1.4469509128402437e-06, + "loss": 0.8862, + "step": 6484 + }, + { + "epoch": 1.35, + "learning_rate": 1.4461096377030453e-06, + "loss": 0.9357, + "step": 6485 + }, + { + "epoch": 1.35, + "learning_rate": 1.445268529531255e-06, + "loss": 0.7819, + "step": 6486 + }, + { + "epoch": 1.35, + "learning_rate": 1.4444275884152505e-06, + "loss": 0.7505, + "step": 6487 + }, + { + "epoch": 1.35, + "learning_rate": 1.4435868144453918e-06, + "loss": 0.97, + "step": 6488 + }, + { + "epoch": 1.35, + "learning_rate": 1.4427462077120163e-06, + "loss": 0.8285, + "step": 6489 + }, + { + "epoch": 1.35, + "learning_rate": 1.4419057683054524e-06, + "loss": 0.8555, + "step": 6490 + }, + { + "epoch": 1.35, + "learning_rate": 1.441065496316001e-06, + "loss": 0.7912, + "step": 6491 + }, + { + "epoch": 1.35, + "learning_rate": 1.440225391833952e-06, + "loss": 0.7341, + "step": 6492 + }, + { + "epoch": 1.35, + "learning_rate": 1.439385454949571e-06, + "loss": 1.0783, + "step": 6493 + }, + { + "epoch": 1.35, + "learning_rate": 1.4385456857531138e-06, + "loss": 0.8421, + "step": 6494 + }, + { + "epoch": 1.35, + "learning_rate": 1.4377060843348102e-06, + "loss": 0.7591, + "step": 6495 + }, + { + "epoch": 1.35, + "learning_rate": 1.4368666507848776e-06, + "loss": 0.9004, + "step": 6496 + }, + { + "epoch": 1.35, + "learning_rate": 1.4360273851935094e-06, + "loss": 0.9518, + "step": 6497 + }, + { + "epoch": 1.35, + "learning_rate": 1.4351882876508893e-06, + "loss": 0.7135, + "step": 6498 + }, + { + "epoch": 1.35, + "learning_rate": 1.4343493582471747e-06, + "loss": 1.0702, + "step": 6499 + }, + { + "epoch": 1.35, + "learning_rate": 1.4335105970725112e-06, + "loss": 0.9213, + "step": 6500 + }, + { + "epoch": 1.35, + "learning_rate": 1.43267200421702e-06, + "loss": 1.2515, + "step": 6501 + }, + { + "epoch": 1.35, + "learning_rate": 1.4318335797708126e-06, + "loss": 0.6504, + "step": 6502 + }, + { + "epoch": 1.35, + "learning_rate": 1.4309953238239736e-06, + "loss": 0.9165, + "step": 6503 + }, + { + "epoch": 1.35, + "learning_rate": 1.4301572364665764e-06, + "loss": 0.9404, + "step": 6504 + }, + { + "epoch": 1.35, + "learning_rate": 1.42931931778867e-06, + "loss": 0.7413, + "step": 6505 + }, + { + "epoch": 1.35, + "learning_rate": 1.4284815678802927e-06, + "loss": 0.8186, + "step": 6506 + }, + { + "epoch": 1.35, + "learning_rate": 1.427643986831458e-06, + "loss": 0.8692, + "step": 6507 + }, + { + "epoch": 1.35, + "learning_rate": 1.4268065747321657e-06, + "loss": 0.8415, + "step": 6508 + }, + { + "epoch": 1.35, + "learning_rate": 1.4259693316723928e-06, + "loss": 0.7132, + "step": 6509 + }, + { + "epoch": 1.35, + "learning_rate": 1.4251322577421048e-06, + "loss": 0.8842, + "step": 6510 + }, + { + "epoch": 1.35, + "learning_rate": 1.424295353031242e-06, + "loss": 0.8095, + "step": 6511 + }, + { + "epoch": 1.35, + "learning_rate": 1.4234586176297327e-06, + "loss": 0.8069, + "step": 6512 + }, + { + "epoch": 1.35, + "learning_rate": 1.4226220516274797e-06, + "loss": 1.0709, + "step": 6513 + }, + { + "epoch": 1.35, + "learning_rate": 1.4217856551143772e-06, + "loss": 0.919, + "step": 6514 + }, + { + "epoch": 1.36, + "learning_rate": 1.4209494281802922e-06, + "loss": 0.8702, + "step": 6515 + }, + { + "epoch": 1.36, + "learning_rate": 1.4201133709150797e-06, + "loss": 0.8902, + "step": 6516 + }, + { + "epoch": 1.36, + "learning_rate": 1.4192774834085706e-06, + "loss": 0.9974, + "step": 6517 + }, + { + "epoch": 1.36, + "learning_rate": 1.418441765750585e-06, + "loss": 0.888, + "step": 6518 + }, + { + "epoch": 1.36, + "learning_rate": 1.4176062180309179e-06, + "loss": 1.0993, + "step": 6519 + }, + { + "epoch": 1.36, + "learning_rate": 1.416770840339351e-06, + "loss": 0.9159, + "step": 6520 + }, + { + "epoch": 1.36, + "learning_rate": 1.4159356327656421e-06, + "loss": 0.9144, + "step": 6521 + }, + { + "epoch": 1.36, + "learning_rate": 1.4151005953995384e-06, + "loss": 0.8711, + "step": 6522 + }, + { + "epoch": 1.36, + "learning_rate": 1.4142657283307616e-06, + "loss": 0.9718, + "step": 6523 + }, + { + "epoch": 1.36, + "learning_rate": 1.4134310316490203e-06, + "loss": 1.1161, + "step": 6524 + }, + { + "epoch": 1.36, + "learning_rate": 1.4125965054439994e-06, + "loss": 0.817, + "step": 6525 + }, + { + "epoch": 1.36, + "learning_rate": 1.4117621498053725e-06, + "loss": 0.8939, + "step": 6526 + }, + { + "epoch": 1.36, + "learning_rate": 1.4109279648227887e-06, + "loss": 0.8062, + "step": 6527 + }, + { + "epoch": 1.36, + "learning_rate": 1.4100939505858822e-06, + "loss": 0.9737, + "step": 6528 + }, + { + "epoch": 1.36, + "learning_rate": 1.4092601071842655e-06, + "loss": 1.1303, + "step": 6529 + }, + { + "epoch": 1.36, + "learning_rate": 1.4084264347075388e-06, + "loss": 0.71, + "step": 6530 + }, + { + "epoch": 1.36, + "learning_rate": 1.4075929332452772e-06, + "loss": 0.7517, + "step": 6531 + }, + { + "epoch": 1.36, + "learning_rate": 1.4067596028870421e-06, + "loss": 1.0714, + "step": 6532 + }, + { + "epoch": 1.36, + "learning_rate": 1.405926443722372e-06, + "loss": 0.9655, + "step": 6533 + }, + { + "epoch": 1.36, + "learning_rate": 1.4050934558407939e-06, + "loss": 1.0585, + "step": 6534 + }, + { + "epoch": 1.36, + "learning_rate": 1.4042606393318087e-06, + "loss": 0.9255, + "step": 6535 + }, + { + "epoch": 1.36, + "learning_rate": 1.4034279942849056e-06, + "loss": 0.9949, + "step": 6536 + }, + { + "epoch": 1.36, + "learning_rate": 1.4025955207895482e-06, + "loss": 0.9094, + "step": 6537 + }, + { + "epoch": 1.36, + "learning_rate": 1.4017632189351903e-06, + "loss": 0.9864, + "step": 6538 + }, + { + "epoch": 1.36, + "learning_rate": 1.4009310888112593e-06, + "loss": 0.7722, + "step": 6539 + }, + { + "epoch": 1.36, + "learning_rate": 1.40009913050717e-06, + "loss": 1.1161, + "step": 6540 + }, + { + "epoch": 1.36, + "learning_rate": 1.3992673441123126e-06, + "loss": 0.8896, + "step": 6541 + }, + { + "epoch": 1.36, + "learning_rate": 1.398435729716067e-06, + "loss": 1.0284, + "step": 6542 + }, + { + "epoch": 1.36, + "learning_rate": 1.3976042874077868e-06, + "loss": 0.9105, + "step": 6543 + }, + { + "epoch": 1.36, + "learning_rate": 1.3967730172768129e-06, + "loss": 1.0152, + "step": 6544 + }, + { + "epoch": 1.36, + "learning_rate": 1.3959419194124614e-06, + "loss": 0.9502, + "step": 6545 + }, + { + "epoch": 1.36, + "learning_rate": 1.3951109939040386e-06, + "loss": 0.9993, + "step": 6546 + }, + { + "epoch": 1.36, + "learning_rate": 1.3942802408408238e-06, + "loss": 0.6866, + "step": 6547 + }, + { + "epoch": 1.36, + "learning_rate": 1.393449660312084e-06, + "loss": 0.9295, + "step": 6548 + }, + { + "epoch": 1.36, + "learning_rate": 1.3926192524070607e-06, + "loss": 0.8397, + "step": 6549 + }, + { + "epoch": 1.36, + "learning_rate": 1.391789017214987e-06, + "loss": 1.0773, + "step": 6550 + }, + { + "epoch": 1.36, + "learning_rate": 1.390958954825067e-06, + "loss": 0.992, + "step": 6551 + }, + { + "epoch": 1.36, + "learning_rate": 1.3901290653264938e-06, + "loss": 0.7448, + "step": 6552 + }, + { + "epoch": 1.36, + "learning_rate": 1.3892993488084353e-06, + "loss": 0.9002, + "step": 6553 + }, + { + "epoch": 1.36, + "learning_rate": 1.388469805360049e-06, + "loss": 0.8622, + "step": 6554 + }, + { + "epoch": 1.36, + "learning_rate": 1.387640435070466e-06, + "loss": 0.8931, + "step": 6555 + }, + { + "epoch": 1.36, + "learning_rate": 1.3868112380288035e-06, + "loss": 0.6334, + "step": 6556 + }, + { + "epoch": 1.36, + "learning_rate": 1.3859822143241582e-06, + "loss": 0.948, + "step": 6557 + }, + { + "epoch": 1.36, + "learning_rate": 1.3851533640456097e-06, + "loss": 0.8655, + "step": 6558 + }, + { + "epoch": 1.36, + "learning_rate": 1.384324687282216e-06, + "loss": 1.0403, + "step": 6559 + }, + { + "epoch": 1.36, + "learning_rate": 1.3834961841230196e-06, + "loss": 0.8378, + "step": 6560 + }, + { + "epoch": 1.36, + "learning_rate": 1.3826678546570426e-06, + "loss": 0.9306, + "step": 6561 + }, + { + "epoch": 1.36, + "learning_rate": 1.3818396989732905e-06, + "loss": 0.9944, + "step": 6562 + }, + { + "epoch": 1.37, + "learning_rate": 1.3810117171607461e-06, + "loss": 0.9046, + "step": 6563 + }, + { + "epoch": 1.37, + "learning_rate": 1.380183909308377e-06, + "loss": 0.9955, + "step": 6564 + }, + { + "epoch": 1.37, + "learning_rate": 1.3793562755051315e-06, + "loss": 0.87, + "step": 6565 + }, + { + "epoch": 1.37, + "learning_rate": 1.3785288158399397e-06, + "loss": 0.9818, + "step": 6566 + }, + { + "epoch": 1.37, + "learning_rate": 1.3777015304017096e-06, + "loss": 1.1803, + "step": 6567 + }, + { + "epoch": 1.37, + "learning_rate": 1.3768744192793343e-06, + "loss": 1.0755, + "step": 6568 + }, + { + "epoch": 1.37, + "learning_rate": 1.376047482561687e-06, + "loss": 0.9743, + "step": 6569 + }, + { + "epoch": 1.37, + "learning_rate": 1.3752207203376229e-06, + "loss": 0.8801, + "step": 6570 + }, + { + "epoch": 1.37, + "learning_rate": 1.374394132695975e-06, + "loss": 0.7723, + "step": 6571 + }, + { + "epoch": 1.37, + "learning_rate": 1.3735677197255618e-06, + "loss": 0.7913, + "step": 6572 + }, + { + "epoch": 1.37, + "learning_rate": 1.3727414815151811e-06, + "loss": 0.9684, + "step": 6573 + }, + { + "epoch": 1.37, + "learning_rate": 1.3719154181536134e-06, + "loss": 0.6843, + "step": 6574 + }, + { + "epoch": 1.37, + "learning_rate": 1.3710895297296163e-06, + "loss": 0.6859, + "step": 6575 + }, + { + "epoch": 1.37, + "learning_rate": 1.3702638163319332e-06, + "loss": 0.9306, + "step": 6576 + }, + { + "epoch": 1.37, + "learning_rate": 1.3694382780492866e-06, + "loss": 0.8895, + "step": 6577 + }, + { + "epoch": 1.37, + "learning_rate": 1.368612914970382e-06, + "loss": 0.9537, + "step": 6578 + }, + { + "epoch": 1.37, + "learning_rate": 1.3677877271839022e-06, + "loss": 0.8478, + "step": 6579 + }, + { + "epoch": 1.37, + "learning_rate": 1.3669627147785144e-06, + "loss": 0.8423, + "step": 6580 + }, + { + "epoch": 1.37, + "learning_rate": 1.3661378778428664e-06, + "loss": 0.7934, + "step": 6581 + }, + { + "epoch": 1.37, + "learning_rate": 1.3653132164655881e-06, + "loss": 0.7367, + "step": 6582 + }, + { + "epoch": 1.37, + "learning_rate": 1.3644887307352869e-06, + "loss": 0.9978, + "step": 6583 + }, + { + "epoch": 1.37, + "learning_rate": 1.3636644207405548e-06, + "loss": 1.1592, + "step": 6584 + }, + { + "epoch": 1.37, + "learning_rate": 1.362840286569964e-06, + "loss": 1.0023, + "step": 6585 + }, + { + "epoch": 1.37, + "learning_rate": 1.3620163283120687e-06, + "loss": 0.9426, + "step": 6586 + }, + { + "epoch": 1.37, + "learning_rate": 1.3611925460554009e-06, + "loss": 1.0317, + "step": 6587 + }, + { + "epoch": 1.37, + "learning_rate": 1.360368939888477e-06, + "loss": 0.7481, + "step": 6588 + }, + { + "epoch": 1.37, + "learning_rate": 1.3595455098997934e-06, + "loss": 0.7705, + "step": 6589 + }, + { + "epoch": 1.37, + "learning_rate": 1.3587222561778293e-06, + "loss": 0.9572, + "step": 6590 + }, + { + "epoch": 1.37, + "learning_rate": 1.3578991788110401e-06, + "loss": 0.8553, + "step": 6591 + }, + { + "epoch": 1.37, + "learning_rate": 1.357076277887867e-06, + "loss": 1.1864, + "step": 6592 + }, + { + "epoch": 1.37, + "learning_rate": 1.3562535534967308e-06, + "loss": 0.8468, + "step": 6593 + }, + { + "epoch": 1.37, + "learning_rate": 1.355431005726034e-06, + "loss": 0.6479, + "step": 6594 + }, + { + "epoch": 1.37, + "learning_rate": 1.3546086346641572e-06, + "loss": 0.8392, + "step": 6595 + }, + { + "epoch": 1.37, + "learning_rate": 1.3537864403994652e-06, + "loss": 0.8531, + "step": 6596 + }, + { + "epoch": 1.37, + "learning_rate": 1.3529644230203025e-06, + "loss": 0.8784, + "step": 6597 + }, + { + "epoch": 1.37, + "learning_rate": 1.3521425826149966e-06, + "loss": 0.8826, + "step": 6598 + }, + { + "epoch": 1.37, + "learning_rate": 1.3513209192718508e-06, + "loss": 1.1329, + "step": 6599 + }, + { + "epoch": 1.37, + "learning_rate": 1.350499433079155e-06, + "loss": 1.0777, + "step": 6600 + }, + { + "epoch": 1.37, + "learning_rate": 1.3496781241251773e-06, + "loss": 0.8636, + "step": 6601 + }, + { + "epoch": 1.37, + "learning_rate": 1.3488569924981688e-06, + "loss": 1.0466, + "step": 6602 + }, + { + "epoch": 1.37, + "learning_rate": 1.3480360382863572e-06, + "loss": 0.8856, + "step": 6603 + }, + { + "epoch": 1.37, + "learning_rate": 1.3472152615779555e-06, + "loss": 0.9092, + "step": 6604 + }, + { + "epoch": 1.37, + "learning_rate": 1.3463946624611561e-06, + "loss": 0.9311, + "step": 6605 + }, + { + "epoch": 1.37, + "learning_rate": 1.3455742410241332e-06, + "loss": 0.7164, + "step": 6606 + }, + { + "epoch": 1.37, + "learning_rate": 1.344753997355039e-06, + "loss": 0.8951, + "step": 6607 + }, + { + "epoch": 1.37, + "learning_rate": 1.3439339315420097e-06, + "loss": 0.9693, + "step": 6608 + }, + { + "epoch": 1.37, + "learning_rate": 1.3431140436731611e-06, + "loss": 0.9444, + "step": 6609 + }, + { + "epoch": 1.37, + "learning_rate": 1.3422943338365917e-06, + "loss": 0.7414, + "step": 6610 + }, + { + "epoch": 1.38, + "learning_rate": 1.3414748021203764e-06, + "loss": 1.0658, + "step": 6611 + }, + { + "epoch": 1.38, + "learning_rate": 1.3406554486125754e-06, + "loss": 1.1119, + "step": 6612 + }, + { + "epoch": 1.38, + "learning_rate": 1.3398362734012281e-06, + "loss": 0.9073, + "step": 6613 + }, + { + "epoch": 1.38, + "learning_rate": 1.3390172765743559e-06, + "loss": 0.7765, + "step": 6614 + }, + { + "epoch": 1.38, + "learning_rate": 1.3381984582199576e-06, + "loss": 0.994, + "step": 6615 + }, + { + "epoch": 1.38, + "learning_rate": 1.3373798184260162e-06, + "loss": 0.9415, + "step": 6616 + }, + { + "epoch": 1.38, + "learning_rate": 1.3365613572804947e-06, + "loss": 0.7317, + "step": 6617 + }, + { + "epoch": 1.38, + "learning_rate": 1.3357430748713383e-06, + "loss": 0.9485, + "step": 6618 + }, + { + "epoch": 1.38, + "learning_rate": 1.3349249712864682e-06, + "loss": 0.7774, + "step": 6619 + }, + { + "epoch": 1.38, + "learning_rate": 1.3341070466137912e-06, + "loss": 0.8216, + "step": 6620 + }, + { + "epoch": 1.38, + "learning_rate": 1.3332893009411942e-06, + "loss": 1.0826, + "step": 6621 + }, + { + "epoch": 1.38, + "learning_rate": 1.3324717343565417e-06, + "loss": 0.9731, + "step": 6622 + }, + { + "epoch": 1.38, + "learning_rate": 1.3316543469476824e-06, + "loss": 0.8743, + "step": 6623 + }, + { + "epoch": 1.38, + "learning_rate": 1.3308371388024448e-06, + "loss": 0.8553, + "step": 6624 + }, + { + "epoch": 1.38, + "learning_rate": 1.3300201100086381e-06, + "loss": 0.8233, + "step": 6625 + }, + { + "epoch": 1.38, + "learning_rate": 1.3292032606540506e-06, + "loss": 0.8455, + "step": 6626 + }, + { + "epoch": 1.38, + "learning_rate": 1.3283865908264538e-06, + "loss": 0.7132, + "step": 6627 + }, + { + "epoch": 1.38, + "learning_rate": 1.327570100613598e-06, + "loss": 0.7533, + "step": 6628 + }, + { + "epoch": 1.38, + "learning_rate": 1.3267537901032171e-06, + "loss": 0.7121, + "step": 6629 + }, + { + "epoch": 1.38, + "learning_rate": 1.3259376593830207e-06, + "loss": 0.854, + "step": 6630 + }, + { + "epoch": 1.38, + "learning_rate": 1.3251217085407033e-06, + "loss": 1.0314, + "step": 6631 + }, + { + "epoch": 1.38, + "learning_rate": 1.324305937663939e-06, + "loss": 0.8581, + "step": 6632 + }, + { + "epoch": 1.38, + "learning_rate": 1.323490346840383e-06, + "loss": 0.8971, + "step": 6633 + }, + { + "epoch": 1.38, + "learning_rate": 1.3226749361576686e-06, + "loss": 0.841, + "step": 6634 + }, + { + "epoch": 1.38, + "learning_rate": 1.3218597057034126e-06, + "loss": 0.7428, + "step": 6635 + }, + { + "epoch": 1.38, + "learning_rate": 1.3210446555652113e-06, + "loss": 0.9345, + "step": 6636 + }, + { + "epoch": 1.38, + "learning_rate": 1.3202297858306435e-06, + "loss": 1.1541, + "step": 6637 + }, + { + "epoch": 1.38, + "learning_rate": 1.319415096587264e-06, + "loss": 0.9749, + "step": 6638 + }, + { + "epoch": 1.38, + "learning_rate": 1.3186005879226123e-06, + "loss": 0.8167, + "step": 6639 + }, + { + "epoch": 1.38, + "learning_rate": 1.3177862599242077e-06, + "loss": 0.8852, + "step": 6640 + }, + { + "epoch": 1.38, + "learning_rate": 1.3169721126795503e-06, + "loss": 0.7853, + "step": 6641 + }, + { + "epoch": 1.38, + "learning_rate": 1.3161581462761183e-06, + "loss": 0.9683, + "step": 6642 + }, + { + "epoch": 1.38, + "learning_rate": 1.3153443608013733e-06, + "loss": 0.9565, + "step": 6643 + }, + { + "epoch": 1.38, + "learning_rate": 1.3145307563427568e-06, + "loss": 0.9808, + "step": 6644 + }, + { + "epoch": 1.38, + "learning_rate": 1.313717332987691e-06, + "loss": 1.0113, + "step": 6645 + }, + { + "epoch": 1.38, + "learning_rate": 1.3129040908235765e-06, + "loss": 0.8932, + "step": 6646 + }, + { + "epoch": 1.38, + "learning_rate": 1.312091029937797e-06, + "loss": 0.72, + "step": 6647 + }, + { + "epoch": 1.38, + "learning_rate": 1.3112781504177158e-06, + "loss": 0.7778, + "step": 6648 + }, + { + "epoch": 1.38, + "learning_rate": 1.310465452350678e-06, + "loss": 0.8635, + "step": 6649 + }, + { + "epoch": 1.38, + "learning_rate": 1.309652935824006e-06, + "loss": 0.8762, + "step": 6650 + }, + { + "epoch": 1.38, + "learning_rate": 1.3088406009250048e-06, + "loss": 1.0237, + "step": 6651 + }, + { + "epoch": 1.38, + "learning_rate": 1.3080284477409607e-06, + "loss": 0.9349, + "step": 6652 + }, + { + "epoch": 1.38, + "learning_rate": 1.3072164763591403e-06, + "loss": 1.0618, + "step": 6653 + }, + { + "epoch": 1.38, + "learning_rate": 1.3064046868667876e-06, + "loss": 0.9973, + "step": 6654 + }, + { + "epoch": 1.38, + "learning_rate": 1.3055930793511306e-06, + "loss": 0.8545, + "step": 6655 + }, + { + "epoch": 1.38, + "learning_rate": 1.3047816538993758e-06, + "loss": 0.8367, + "step": 6656 + }, + { + "epoch": 1.38, + "learning_rate": 1.303970410598713e-06, + "loss": 0.9513, + "step": 6657 + }, + { + "epoch": 1.38, + "learning_rate": 1.3031593495363073e-06, + "loss": 1.0338, + "step": 6658 + }, + { + "epoch": 1.38, + "learning_rate": 1.302348470799308e-06, + "loss": 0.9612, + "step": 6659 + }, + { + "epoch": 1.39, + "learning_rate": 1.3015377744748447e-06, + "loss": 0.7491, + "step": 6660 + }, + { + "epoch": 1.39, + "learning_rate": 1.3007272606500274e-06, + "loss": 0.8788, + "step": 6661 + }, + { + "epoch": 1.39, + "learning_rate": 1.2999169294119438e-06, + "loss": 0.8427, + "step": 6662 + }, + { + "epoch": 1.39, + "learning_rate": 1.2991067808476646e-06, + "loss": 0.9055, + "step": 6663 + }, + { + "epoch": 1.39, + "learning_rate": 1.2982968150442402e-06, + "loss": 0.8682, + "step": 6664 + }, + { + "epoch": 1.39, + "learning_rate": 1.2974870320887028e-06, + "loss": 0.7889, + "step": 6665 + }, + { + "epoch": 1.39, + "learning_rate": 1.2966774320680615e-06, + "loss": 0.8446, + "step": 6666 + }, + { + "epoch": 1.39, + "learning_rate": 1.2958680150693085e-06, + "loss": 1.0341, + "step": 6667 + }, + { + "epoch": 1.39, + "learning_rate": 1.2950587811794155e-06, + "loss": 0.8566, + "step": 6668 + }, + { + "epoch": 1.39, + "learning_rate": 1.2942497304853361e-06, + "loss": 0.8221, + "step": 6669 + }, + { + "epoch": 1.39, + "learning_rate": 1.2934408630740003e-06, + "loss": 1.1999, + "step": 6670 + }, + { + "epoch": 1.39, + "learning_rate": 1.292632179032322e-06, + "loss": 0.9019, + "step": 6671 + }, + { + "epoch": 1.39, + "learning_rate": 1.291823678447194e-06, + "loss": 0.911, + "step": 6672 + }, + { + "epoch": 1.39, + "learning_rate": 1.2910153614054911e-06, + "loss": 0.8821, + "step": 6673 + }, + { + "epoch": 1.39, + "learning_rate": 1.2902072279940645e-06, + "loss": 0.9117, + "step": 6674 + }, + { + "epoch": 1.39, + "learning_rate": 1.2893992782997493e-06, + "loss": 1.0332, + "step": 6675 + }, + { + "epoch": 1.39, + "learning_rate": 1.2885915124093598e-06, + "loss": 0.9276, + "step": 6676 + }, + { + "epoch": 1.39, + "learning_rate": 1.287783930409691e-06, + "loss": 0.9504, + "step": 6677 + }, + { + "epoch": 1.39, + "learning_rate": 1.2869765323875156e-06, + "loss": 0.8496, + "step": 6678 + }, + { + "epoch": 1.39, + "learning_rate": 1.2861693184295896e-06, + "loss": 0.857, + "step": 6679 + }, + { + "epoch": 1.39, + "learning_rate": 1.2853622886226482e-06, + "loss": 1.0342, + "step": 6680 + }, + { + "epoch": 1.39, + "learning_rate": 1.2845554430534075e-06, + "loss": 0.8265, + "step": 6681 + }, + { + "epoch": 1.39, + "learning_rate": 1.2837487818085608e-06, + "loss": 0.9281, + "step": 6682 + }, + { + "epoch": 1.39, + "learning_rate": 1.2829423049747852e-06, + "loss": 1.0113, + "step": 6683 + }, + { + "epoch": 1.39, + "learning_rate": 1.2821360126387365e-06, + "loss": 1.0332, + "step": 6684 + }, + { + "epoch": 1.39, + "learning_rate": 1.2813299048870514e-06, + "loss": 0.9541, + "step": 6685 + }, + { + "epoch": 1.39, + "learning_rate": 1.2805239818063446e-06, + "loss": 0.9614, + "step": 6686 + }, + { + "epoch": 1.39, + "learning_rate": 1.279718243483213e-06, + "loss": 0.9916, + "step": 6687 + }, + { + "epoch": 1.39, + "learning_rate": 1.2789126900042335e-06, + "loss": 0.95, + "step": 6688 + }, + { + "epoch": 1.39, + "learning_rate": 1.278107321455964e-06, + "loss": 0.9629, + "step": 6689 + }, + { + "epoch": 1.39, + "learning_rate": 1.2773021379249374e-06, + "loss": 0.9307, + "step": 6690 + }, + { + "epoch": 1.39, + "learning_rate": 1.2764971394976756e-06, + "loss": 0.87, + "step": 6691 + }, + { + "epoch": 1.39, + "learning_rate": 1.2756923262606719e-06, + "loss": 0.6714, + "step": 6692 + }, + { + "epoch": 1.39, + "learning_rate": 1.2748876983004059e-06, + "loss": 0.9719, + "step": 6693 + }, + { + "epoch": 1.39, + "learning_rate": 1.2740832557033314e-06, + "loss": 1.0875, + "step": 6694 + }, + { + "epoch": 1.39, + "learning_rate": 1.27327899855589e-06, + "loss": 0.7716, + "step": 6695 + }, + { + "epoch": 1.39, + "learning_rate": 1.272474926944496e-06, + "loss": 0.6669, + "step": 6696 + }, + { + "epoch": 1.39, + "learning_rate": 1.2716710409555487e-06, + "loss": 0.8407, + "step": 6697 + }, + { + "epoch": 1.39, + "learning_rate": 1.2708673406754229e-06, + "loss": 1.0069, + "step": 6698 + }, + { + "epoch": 1.39, + "learning_rate": 1.27006382619048e-06, + "loss": 0.9753, + "step": 6699 + }, + { + "epoch": 1.39, + "learning_rate": 1.269260497587054e-06, + "loss": 0.9722, + "step": 6700 + }, + { + "epoch": 1.39, + "learning_rate": 1.2684573549514657e-06, + "loss": 0.7055, + "step": 6701 + }, + { + "epoch": 1.39, + "learning_rate": 1.2676543983700089e-06, + "loss": 0.9855, + "step": 6702 + }, + { + "epoch": 1.39, + "learning_rate": 1.266851627928965e-06, + "loss": 0.9214, + "step": 6703 + }, + { + "epoch": 1.39, + "learning_rate": 1.2660490437145893e-06, + "loss": 0.9817, + "step": 6704 + }, + { + "epoch": 1.39, + "learning_rate": 1.2652466458131212e-06, + "loss": 0.8992, + "step": 6705 + }, + { + "epoch": 1.39, + "learning_rate": 1.264444434310775e-06, + "loss": 0.8613, + "step": 6706 + }, + { + "epoch": 1.39, + "learning_rate": 1.2636424092937528e-06, + "loss": 0.9161, + "step": 6707 + }, + { + "epoch": 1.4, + "learning_rate": 1.2628405708482285e-06, + "loss": 0.7327, + "step": 6708 + }, + { + "epoch": 1.4, + "learning_rate": 1.262038919060362e-06, + "loss": 0.5397, + "step": 6709 + }, + { + "epoch": 1.4, + "learning_rate": 1.2612374540162878e-06, + "loss": 1.0688, + "step": 6710 + }, + { + "epoch": 1.4, + "learning_rate": 1.260436175802127e-06, + "loss": 0.9075, + "step": 6711 + }, + { + "epoch": 1.4, + "learning_rate": 1.259635084503974e-06, + "loss": 0.9346, + "step": 6712 + }, + { + "epoch": 1.4, + "learning_rate": 1.258834180207908e-06, + "loss": 0.8958, + "step": 6713 + }, + { + "epoch": 1.4, + "learning_rate": 1.2580334629999826e-06, + "loss": 0.8132, + "step": 6714 + }, + { + "epoch": 1.4, + "learning_rate": 1.25723293296624e-06, + "loss": 0.8626, + "step": 6715 + }, + { + "epoch": 1.4, + "learning_rate": 1.256432590192693e-06, + "loss": 0.7124, + "step": 6716 + }, + { + "epoch": 1.4, + "learning_rate": 1.2556324347653409e-06, + "loss": 0.9198, + "step": 6717 + }, + { + "epoch": 1.4, + "learning_rate": 1.2548324667701572e-06, + "loss": 0.8463, + "step": 6718 + }, + { + "epoch": 1.4, + "learning_rate": 1.2540326862931024e-06, + "loss": 0.7562, + "step": 6719 + }, + { + "epoch": 1.4, + "learning_rate": 1.2532330934201095e-06, + "loss": 0.7193, + "step": 6720 + }, + { + "epoch": 1.4, + "learning_rate": 1.2524336882370975e-06, + "loss": 1.1189, + "step": 6721 + }, + { + "epoch": 1.4, + "learning_rate": 1.2516344708299588e-06, + "loss": 0.7992, + "step": 6722 + }, + { + "epoch": 1.4, + "learning_rate": 1.2508354412845735e-06, + "loss": 0.9314, + "step": 6723 + }, + { + "epoch": 1.4, + "learning_rate": 1.2500365996867944e-06, + "loss": 0.8013, + "step": 6724 + }, + { + "epoch": 1.4, + "learning_rate": 1.249237946122459e-06, + "loss": 1.1225, + "step": 6725 + }, + { + "epoch": 1.4, + "learning_rate": 1.2484394806773789e-06, + "loss": 1.1243, + "step": 6726 + }, + { + "epoch": 1.4, + "learning_rate": 1.2476412034373536e-06, + "loss": 0.8424, + "step": 6727 + }, + { + "epoch": 1.4, + "learning_rate": 1.2468431144881548e-06, + "loss": 0.8411, + "step": 6728 + }, + { + "epoch": 1.4, + "learning_rate": 1.24604521391554e-06, + "loss": 0.7648, + "step": 6729 + }, + { + "epoch": 1.4, + "learning_rate": 1.245247501805239e-06, + "loss": 0.8139, + "step": 6730 + }, + { + "epoch": 1.4, + "learning_rate": 1.2444499782429711e-06, + "loss": 0.9499, + "step": 6731 + }, + { + "epoch": 1.4, + "learning_rate": 1.2436526433144266e-06, + "loss": 0.9052, + "step": 6732 + }, + { + "epoch": 1.4, + "learning_rate": 1.2428554971052818e-06, + "loss": 1.0495, + "step": 6733 + }, + { + "epoch": 1.4, + "learning_rate": 1.2420585397011857e-06, + "loss": 0.8181, + "step": 6734 + }, + { + "epoch": 1.4, + "learning_rate": 1.2412617711877764e-06, + "loss": 0.8947, + "step": 6735 + }, + { + "epoch": 1.4, + "learning_rate": 1.240465191650663e-06, + "loss": 0.8688, + "step": 6736 + }, + { + "epoch": 1.4, + "learning_rate": 1.2396688011754406e-06, + "loss": 1.0235, + "step": 6737 + }, + { + "epoch": 1.4, + "learning_rate": 1.2388725998476777e-06, + "loss": 0.808, + "step": 6738 + }, + { + "epoch": 1.4, + "learning_rate": 1.2380765877529305e-06, + "loss": 0.9042, + "step": 6739 + }, + { + "epoch": 1.4, + "learning_rate": 1.237280764976727e-06, + "loss": 0.7171, + "step": 6740 + }, + { + "epoch": 1.4, + "learning_rate": 1.2364851316045805e-06, + "loss": 0.8332, + "step": 6741 + }, + { + "epoch": 1.4, + "learning_rate": 1.2356896877219787e-06, + "loss": 0.8975, + "step": 6742 + }, + { + "epoch": 1.4, + "learning_rate": 1.2348944334143965e-06, + "loss": 0.8839, + "step": 6743 + }, + { + "epoch": 1.4, + "learning_rate": 1.23409936876728e-06, + "loss": 0.715, + "step": 6744 + }, + { + "epoch": 1.4, + "learning_rate": 1.2333044938660613e-06, + "loss": 0.8983, + "step": 6745 + }, + { + "epoch": 1.4, + "learning_rate": 1.2325098087961469e-06, + "loss": 0.9733, + "step": 6746 + }, + { + "epoch": 1.4, + "learning_rate": 1.2317153136429293e-06, + "loss": 0.9586, + "step": 6747 + }, + { + "epoch": 1.4, + "learning_rate": 1.2309210084917743e-06, + "loss": 0.9427, + "step": 6748 + }, + { + "epoch": 1.4, + "learning_rate": 1.2301268934280317e-06, + "loss": 0.8111, + "step": 6749 + }, + { + "epoch": 1.4, + "learning_rate": 1.2293329685370256e-06, + "loss": 1.0365, + "step": 6750 + }, + { + "epoch": 1.4, + "learning_rate": 1.2285392339040682e-06, + "loss": 0.9636, + "step": 6751 + }, + { + "epoch": 1.4, + "learning_rate": 1.2277456896144425e-06, + "loss": 0.925, + "step": 6752 + }, + { + "epoch": 1.4, + "learning_rate": 1.2269523357534168e-06, + "loss": 0.9947, + "step": 6753 + }, + { + "epoch": 1.4, + "learning_rate": 1.2261591724062344e-06, + "loss": 0.9895, + "step": 6754 + }, + { + "epoch": 1.4, + "learning_rate": 1.2253661996581243e-06, + "loss": 0.6711, + "step": 6755 + }, + { + "epoch": 1.41, + "learning_rate": 1.2245734175942884e-06, + "loss": 1.0295, + "step": 6756 + }, + { + "epoch": 1.41, + "learning_rate": 1.2237808262999119e-06, + "loss": 0.9985, + "step": 6757 + }, + { + "epoch": 1.41, + "learning_rate": 1.222988425860159e-06, + "loss": 0.7907, + "step": 6758 + }, + { + "epoch": 1.41, + "learning_rate": 1.2221962163601742e-06, + "loss": 0.7911, + "step": 6759 + }, + { + "epoch": 1.41, + "learning_rate": 1.2214041978850776e-06, + "loss": 1.0396, + "step": 6760 + }, + { + "epoch": 1.41, + "learning_rate": 1.2206123705199732e-06, + "loss": 0.8043, + "step": 6761 + }, + { + "epoch": 1.41, + "learning_rate": 1.219820734349943e-06, + "loss": 0.8628, + "step": 6762 + }, + { + "epoch": 1.41, + "learning_rate": 1.2190292894600483e-06, + "loss": 0.9498, + "step": 6763 + }, + { + "epoch": 1.41, + "learning_rate": 1.2182380359353288e-06, + "loss": 0.9521, + "step": 6764 + }, + { + "epoch": 1.41, + "learning_rate": 1.2174469738608049e-06, + "loss": 0.7847, + "step": 6765 + }, + { + "epoch": 1.41, + "learning_rate": 1.2166561033214766e-06, + "loss": 0.8683, + "step": 6766 + }, + { + "epoch": 1.41, + "learning_rate": 1.2158654244023237e-06, + "loss": 0.9259, + "step": 6767 + }, + { + "epoch": 1.41, + "learning_rate": 1.2150749371883025e-06, + "loss": 0.9482, + "step": 6768 + }, + { + "epoch": 1.41, + "learning_rate": 1.214284641764352e-06, + "loss": 1.0697, + "step": 6769 + }, + { + "epoch": 1.41, + "learning_rate": 1.2134945382153895e-06, + "loss": 1.0602, + "step": 6770 + }, + { + "epoch": 1.41, + "learning_rate": 1.2127046266263121e-06, + "loss": 0.9454, + "step": 6771 + }, + { + "epoch": 1.41, + "learning_rate": 1.2119149070819942e-06, + "loss": 0.9329, + "step": 6772 + }, + { + "epoch": 1.41, + "learning_rate": 1.211125379667292e-06, + "loss": 1.0559, + "step": 6773 + }, + { + "epoch": 1.41, + "learning_rate": 1.21033604446704e-06, + "loss": 1.047, + "step": 6774 + }, + { + "epoch": 1.41, + "learning_rate": 1.2095469015660533e-06, + "loss": 0.84, + "step": 6775 + }, + { + "epoch": 1.41, + "learning_rate": 1.2087579510491235e-06, + "loss": 0.9485, + "step": 6776 + }, + { + "epoch": 1.41, + "learning_rate": 1.2079691930010237e-06, + "loss": 0.9264, + "step": 6777 + }, + { + "epoch": 1.41, + "learning_rate": 1.2071806275065062e-06, + "loss": 0.8456, + "step": 6778 + }, + { + "epoch": 1.41, + "learning_rate": 1.2063922546503037e-06, + "loss": 1.0153, + "step": 6779 + }, + { + "epoch": 1.41, + "learning_rate": 1.2056040745171245e-06, + "loss": 0.798, + "step": 6780 + }, + { + "epoch": 1.41, + "learning_rate": 1.204816087191659e-06, + "loss": 0.9401, + "step": 6781 + }, + { + "epoch": 1.41, + "learning_rate": 1.204028292758577e-06, + "loss": 0.8624, + "step": 6782 + }, + { + "epoch": 1.41, + "learning_rate": 1.2032406913025279e-06, + "loss": 0.8841, + "step": 6783 + }, + { + "epoch": 1.41, + "learning_rate": 1.2024532829081371e-06, + "loss": 0.6948, + "step": 6784 + }, + { + "epoch": 1.41, + "learning_rate": 1.201666067660013e-06, + "loss": 0.8691, + "step": 6785 + }, + { + "epoch": 1.41, + "learning_rate": 1.2008790456427416e-06, + "loss": 0.9622, + "step": 6786 + }, + { + "epoch": 1.41, + "learning_rate": 1.2000922169408896e-06, + "loss": 0.7134, + "step": 6787 + }, + { + "epoch": 1.41, + "learning_rate": 1.1993055816389988e-06, + "loss": 0.7631, + "step": 6788 + }, + { + "epoch": 1.41, + "learning_rate": 1.1985191398215952e-06, + "loss": 0.9268, + "step": 6789 + }, + { + "epoch": 1.41, + "learning_rate": 1.1977328915731814e-06, + "loss": 0.7994, + "step": 6790 + }, + { + "epoch": 1.41, + "learning_rate": 1.1969468369782409e-06, + "loss": 0.9481, + "step": 6791 + }, + { + "epoch": 1.41, + "learning_rate": 1.1961609761212328e-06, + "loss": 0.6481, + "step": 6792 + }, + { + "epoch": 1.41, + "learning_rate": 1.1953753090865995e-06, + "loss": 0.8646, + "step": 6793 + }, + { + "epoch": 1.41, + "learning_rate": 1.19458983595876e-06, + "loss": 0.8032, + "step": 6794 + }, + { + "epoch": 1.41, + "learning_rate": 1.193804556822115e-06, + "loss": 0.6443, + "step": 6795 + }, + { + "epoch": 1.41, + "learning_rate": 1.1930194717610407e-06, + "loss": 0.7034, + "step": 6796 + }, + { + "epoch": 1.41, + "learning_rate": 1.1922345808598948e-06, + "loss": 1.1723, + "step": 6797 + }, + { + "epoch": 1.41, + "learning_rate": 1.1914498842030148e-06, + "loss": 0.8904, + "step": 6798 + }, + { + "epoch": 1.41, + "learning_rate": 1.190665381874716e-06, + "loss": 1.0699, + "step": 6799 + }, + { + "epoch": 1.41, + "learning_rate": 1.1898810739592922e-06, + "loss": 1.1285, + "step": 6800 + }, + { + "epoch": 1.41, + "learning_rate": 1.189096960541018e-06, + "loss": 0.9191, + "step": 6801 + }, + { + "epoch": 1.41, + "learning_rate": 1.1883130417041456e-06, + "loss": 0.8099, + "step": 6802 + }, + { + "epoch": 1.41, + "learning_rate": 1.1875293175329089e-06, + "loss": 0.8462, + "step": 6803 + }, + { + "epoch": 1.42, + "learning_rate": 1.1867457881115168e-06, + "loss": 0.9509, + "step": 6804 + }, + { + "epoch": 1.42, + "learning_rate": 1.1859624535241603e-06, + "loss": 1.0174, + "step": 6805 + }, + { + "epoch": 1.42, + "learning_rate": 1.1851793138550087e-06, + "loss": 0.8237, + "step": 6806 + }, + { + "epoch": 1.42, + "learning_rate": 1.1843963691882113e-06, + "loss": 0.8453, + "step": 6807 + }, + { + "epoch": 1.42, + "learning_rate": 1.1836136196078935e-06, + "loss": 0.9093, + "step": 6808 + }, + { + "epoch": 1.42, + "learning_rate": 1.1828310651981626e-06, + "loss": 0.9509, + "step": 6809 + }, + { + "epoch": 1.42, + "learning_rate": 1.1820487060431042e-06, + "loss": 0.8912, + "step": 6810 + }, + { + "epoch": 1.42, + "learning_rate": 1.1812665422267835e-06, + "loss": 0.799, + "step": 6811 + }, + { + "epoch": 1.42, + "learning_rate": 1.180484573833242e-06, + "loss": 0.8043, + "step": 6812 + }, + { + "epoch": 1.42, + "learning_rate": 1.1797028009465033e-06, + "loss": 1.039, + "step": 6813 + }, + { + "epoch": 1.42, + "learning_rate": 1.1789212236505688e-06, + "loss": 0.8663, + "step": 6814 + }, + { + "epoch": 1.42, + "learning_rate": 1.1781398420294199e-06, + "loss": 0.9469, + "step": 6815 + }, + { + "epoch": 1.42, + "learning_rate": 1.1773586561670138e-06, + "loss": 0.8427, + "step": 6816 + }, + { + "epoch": 1.42, + "learning_rate": 1.17657766614729e-06, + "loss": 0.8202, + "step": 6817 + }, + { + "epoch": 1.42, + "learning_rate": 1.1757968720541662e-06, + "loss": 0.8397, + "step": 6818 + }, + { + "epoch": 1.42, + "learning_rate": 1.175016273971539e-06, + "loss": 0.7284, + "step": 6819 + }, + { + "epoch": 1.42, + "learning_rate": 1.1742358719832821e-06, + "loss": 1.0247, + "step": 6820 + }, + { + "epoch": 1.42, + "learning_rate": 1.1734556661732502e-06, + "loss": 0.9489, + "step": 6821 + }, + { + "epoch": 1.42, + "learning_rate": 1.1726756566252772e-06, + "loss": 0.9965, + "step": 6822 + }, + { + "epoch": 1.42, + "learning_rate": 1.1718958434231747e-06, + "loss": 0.6961, + "step": 6823 + }, + { + "epoch": 1.42, + "learning_rate": 1.171116226650732e-06, + "loss": 0.7918, + "step": 6824 + }, + { + "epoch": 1.42, + "learning_rate": 1.1703368063917222e-06, + "loss": 0.9467, + "step": 6825 + }, + { + "epoch": 1.42, + "learning_rate": 1.1695575827298907e-06, + "loss": 0.9079, + "step": 6826 + }, + { + "epoch": 1.42, + "learning_rate": 1.1687785557489672e-06, + "loss": 0.8293, + "step": 6827 + }, + { + "epoch": 1.42, + "learning_rate": 1.1679997255326559e-06, + "loss": 0.851, + "step": 6828 + }, + { + "epoch": 1.42, + "learning_rate": 1.1672210921646449e-06, + "loss": 0.8853, + "step": 6829 + }, + { + "epoch": 1.42, + "learning_rate": 1.1664426557285957e-06, + "loss": 0.9385, + "step": 6830 + }, + { + "epoch": 1.42, + "learning_rate": 1.1656644163081536e-06, + "loss": 0.8966, + "step": 6831 + }, + { + "epoch": 1.42, + "learning_rate": 1.1648863739869364e-06, + "loss": 0.9809, + "step": 6832 + }, + { + "epoch": 1.42, + "learning_rate": 1.1641085288485502e-06, + "loss": 0.6787, + "step": 6833 + }, + { + "epoch": 1.42, + "learning_rate": 1.16333088097657e-06, + "loss": 0.8369, + "step": 6834 + }, + { + "epoch": 1.42, + "learning_rate": 1.162553430454557e-06, + "loss": 0.9212, + "step": 6835 + }, + { + "epoch": 1.42, + "learning_rate": 1.1617761773660446e-06, + "loss": 1.0998, + "step": 6836 + }, + { + "epoch": 1.42, + "learning_rate": 1.1609991217945523e-06, + "loss": 1.0333, + "step": 6837 + }, + { + "epoch": 1.42, + "learning_rate": 1.1602222638235728e-06, + "loss": 0.8484, + "step": 6838 + }, + { + "epoch": 1.42, + "learning_rate": 1.1594456035365806e-06, + "loss": 0.8359, + "step": 6839 + }, + { + "epoch": 1.42, + "learning_rate": 1.1586691410170247e-06, + "loss": 0.9133, + "step": 6840 + }, + { + "epoch": 1.42, + "learning_rate": 1.1578928763483406e-06, + "loss": 1.1417, + "step": 6841 + }, + { + "epoch": 1.42, + "learning_rate": 1.1571168096139343e-06, + "loss": 0.892, + "step": 6842 + }, + { + "epoch": 1.42, + "learning_rate": 1.1563409408971963e-06, + "loss": 0.9959, + "step": 6843 + }, + { + "epoch": 1.42, + "learning_rate": 1.1555652702814907e-06, + "loss": 1.1017, + "step": 6844 + }, + { + "epoch": 1.42, + "learning_rate": 1.1547897978501668e-06, + "loss": 0.8715, + "step": 6845 + }, + { + "epoch": 1.42, + "learning_rate": 1.1540145236865468e-06, + "loss": 0.9625, + "step": 6846 + }, + { + "epoch": 1.42, + "learning_rate": 1.1532394478739356e-06, + "loss": 0.9196, + "step": 6847 + }, + { + "epoch": 1.42, + "learning_rate": 1.1524645704956117e-06, + "loss": 1.011, + "step": 6848 + }, + { + "epoch": 1.42, + "learning_rate": 1.1516898916348402e-06, + "loss": 0.9545, + "step": 6849 + }, + { + "epoch": 1.42, + "learning_rate": 1.1509154113748565e-06, + "loss": 0.9104, + "step": 6850 + }, + { + "epoch": 1.42, + "learning_rate": 1.1501411297988813e-06, + "loss": 1.0047, + "step": 6851 + }, + { + "epoch": 1.43, + "learning_rate": 1.1493670469901075e-06, + "loss": 1.1566, + "step": 6852 + }, + { + "epoch": 1.43, + "learning_rate": 1.1485931630317146e-06, + "loss": 0.7519, + "step": 6853 + }, + { + "epoch": 1.43, + "learning_rate": 1.147819478006853e-06, + "loss": 0.8371, + "step": 6854 + }, + { + "epoch": 1.43, + "learning_rate": 1.1470459919986574e-06, + "loss": 0.9923, + "step": 6855 + }, + { + "epoch": 1.43, + "learning_rate": 1.1462727050902356e-06, + "loss": 0.7445, + "step": 6856 + }, + { + "epoch": 1.43, + "learning_rate": 1.1454996173646816e-06, + "loss": 0.9625, + "step": 6857 + }, + { + "epoch": 1.43, + "learning_rate": 1.1447267289050602e-06, + "loss": 0.9502, + "step": 6858 + }, + { + "epoch": 1.43, + "learning_rate": 1.1439540397944206e-06, + "loss": 0.8065, + "step": 6859 + }, + { + "epoch": 1.43, + "learning_rate": 1.1431815501157846e-06, + "loss": 0.6808, + "step": 6860 + }, + { + "epoch": 1.43, + "learning_rate": 1.1424092599521613e-06, + "loss": 1.0784, + "step": 6861 + }, + { + "epoch": 1.43, + "learning_rate": 1.1416371693865285e-06, + "loss": 1.032, + "step": 6862 + }, + { + "epoch": 1.43, + "learning_rate": 1.140865278501851e-06, + "loss": 0.7843, + "step": 6863 + }, + { + "epoch": 1.43, + "learning_rate": 1.1400935873810643e-06, + "loss": 0.9387, + "step": 6864 + }, + { + "epoch": 1.43, + "learning_rate": 1.1393220961070907e-06, + "loss": 0.9387, + "step": 6865 + }, + { + "epoch": 1.43, + "learning_rate": 1.1385508047628246e-06, + "loss": 0.9782, + "step": 6866 + }, + { + "epoch": 1.43, + "learning_rate": 1.1377797134311424e-06, + "loss": 1.192, + "step": 6867 + }, + { + "epoch": 1.43, + "learning_rate": 1.1370088221948954e-06, + "loss": 0.9467, + "step": 6868 + }, + { + "epoch": 1.43, + "learning_rate": 1.136238131136919e-06, + "loss": 0.8818, + "step": 6869 + }, + { + "epoch": 1.43, + "learning_rate": 1.135467640340022e-06, + "loss": 0.876, + "step": 6870 + }, + { + "epoch": 1.43, + "learning_rate": 1.1346973498869946e-06, + "loss": 0.7543, + "step": 6871 + }, + { + "epoch": 1.43, + "learning_rate": 1.133927259860602e-06, + "loss": 0.9252, + "step": 6872 + }, + { + "epoch": 1.43, + "learning_rate": 1.133157370343594e-06, + "loss": 0.9598, + "step": 6873 + }, + { + "epoch": 1.43, + "learning_rate": 1.1323876814186925e-06, + "loss": 0.8297, + "step": 6874 + }, + { + "epoch": 1.43, + "learning_rate": 1.1316181931686023e-06, + "loss": 0.8426, + "step": 6875 + }, + { + "epoch": 1.43, + "learning_rate": 1.130848905676002e-06, + "loss": 1.1608, + "step": 6876 + }, + { + "epoch": 1.43, + "learning_rate": 1.130079819023555e-06, + "loss": 0.9376, + "step": 6877 + }, + { + "epoch": 1.43, + "learning_rate": 1.1293109332938979e-06, + "loss": 0.8482, + "step": 6878 + }, + { + "epoch": 1.43, + "learning_rate": 1.1285422485696477e-06, + "loss": 0.8321, + "step": 6879 + }, + { + "epoch": 1.43, + "learning_rate": 1.1277737649333976e-06, + "loss": 0.9871, + "step": 6880 + }, + { + "epoch": 1.43, + "learning_rate": 1.127005482467725e-06, + "loss": 0.9057, + "step": 6881 + }, + { + "epoch": 1.43, + "learning_rate": 1.1262374012551786e-06, + "loss": 0.8588, + "step": 6882 + }, + { + "epoch": 1.43, + "learning_rate": 1.1254695213782906e-06, + "loss": 0.9413, + "step": 6883 + }, + { + "epoch": 1.43, + "learning_rate": 1.124701842919567e-06, + "loss": 0.8906, + "step": 6884 + }, + { + "epoch": 1.43, + "learning_rate": 1.1239343659614984e-06, + "loss": 0.8612, + "step": 6885 + }, + { + "epoch": 1.43, + "learning_rate": 1.1231670905865472e-06, + "loss": 1.0163, + "step": 6886 + }, + { + "epoch": 1.43, + "learning_rate": 1.1224000168771593e-06, + "loss": 0.9879, + "step": 6887 + }, + { + "epoch": 1.43, + "learning_rate": 1.1216331449157535e-06, + "loss": 0.9622, + "step": 6888 + }, + { + "epoch": 1.43, + "learning_rate": 1.1208664747847346e-06, + "loss": 0.8187, + "step": 6889 + }, + { + "epoch": 1.43, + "learning_rate": 1.1201000065664775e-06, + "loss": 0.9826, + "step": 6890 + }, + { + "epoch": 1.43, + "learning_rate": 1.1193337403433406e-06, + "loss": 0.9072, + "step": 6891 + }, + { + "epoch": 1.43, + "learning_rate": 1.1185676761976603e-06, + "loss": 0.8565, + "step": 6892 + }, + { + "epoch": 1.43, + "learning_rate": 1.1178018142117476e-06, + "loss": 0.9846, + "step": 6893 + }, + { + "epoch": 1.43, + "learning_rate": 1.117036154467896e-06, + "loss": 0.755, + "step": 6894 + }, + { + "epoch": 1.43, + "learning_rate": 1.1162706970483752e-06, + "loss": 0.6764, + "step": 6895 + }, + { + "epoch": 1.43, + "learning_rate": 1.1155054420354344e-06, + "loss": 0.9133, + "step": 6896 + }, + { + "epoch": 1.43, + "learning_rate": 1.1147403895112987e-06, + "loss": 0.8393, + "step": 6897 + }, + { + "epoch": 1.43, + "learning_rate": 1.1139755395581733e-06, + "loss": 1.0558, + "step": 6898 + }, + { + "epoch": 1.43, + "learning_rate": 1.1132108922582416e-06, + "loss": 0.6037, + "step": 6899 + }, + { + "epoch": 1.44, + "learning_rate": 1.112446447693666e-06, + "loss": 0.8971, + "step": 6900 + }, + { + "epoch": 1.44, + "learning_rate": 1.1116822059465837e-06, + "loss": 0.7729, + "step": 6901 + }, + { + "epoch": 1.44, + "learning_rate": 1.110918167099114e-06, + "loss": 0.6806, + "step": 6902 + }, + { + "epoch": 1.44, + "learning_rate": 1.1101543312333519e-06, + "loss": 0.922, + "step": 6903 + }, + { + "epoch": 1.44, + "learning_rate": 1.1093906984313738e-06, + "loss": 0.8411, + "step": 6904 + }, + { + "epoch": 1.44, + "learning_rate": 1.1086272687752288e-06, + "loss": 0.6663, + "step": 6905 + }, + { + "epoch": 1.44, + "learning_rate": 1.1078640423469485e-06, + "loss": 0.7835, + "step": 6906 + }, + { + "epoch": 1.44, + "learning_rate": 1.1071010192285421e-06, + "loss": 1.0212, + "step": 6907 + }, + { + "epoch": 1.44, + "learning_rate": 1.1063381995019973e-06, + "loss": 0.9635, + "step": 6908 + }, + { + "epoch": 1.44, + "learning_rate": 1.1055755832492768e-06, + "loss": 1.0293, + "step": 6909 + }, + { + "epoch": 1.44, + "learning_rate": 1.1048131705523247e-06, + "loss": 0.8336, + "step": 6910 + }, + { + "epoch": 1.44, + "learning_rate": 1.104050961493062e-06, + "loss": 0.8488, + "step": 6911 + }, + { + "epoch": 1.44, + "learning_rate": 1.1032889561533892e-06, + "loss": 0.9155, + "step": 6912 + }, + { + "epoch": 1.44, + "learning_rate": 1.1025271546151819e-06, + "loss": 0.954, + "step": 6913 + }, + { + "epoch": 1.44, + "learning_rate": 1.1017655569602963e-06, + "loss": 0.8606, + "step": 6914 + }, + { + "epoch": 1.44, + "learning_rate": 1.1010041632705663e-06, + "loss": 1.1361, + "step": 6915 + }, + { + "epoch": 1.44, + "learning_rate": 1.1002429736278043e-06, + "loss": 0.882, + "step": 6916 + }, + { + "epoch": 1.44, + "learning_rate": 1.0994819881137984e-06, + "loss": 0.7701, + "step": 6917 + }, + { + "epoch": 1.44, + "learning_rate": 1.098721206810317e-06, + "loss": 1.1854, + "step": 6918 + }, + { + "epoch": 1.44, + "learning_rate": 1.0979606297991063e-06, + "loss": 0.8696, + "step": 6919 + }, + { + "epoch": 1.44, + "learning_rate": 1.0972002571618911e-06, + "loss": 0.8657, + "step": 6920 + }, + { + "epoch": 1.44, + "learning_rate": 1.0964400889803718e-06, + "loss": 1.0656, + "step": 6921 + }, + { + "epoch": 1.44, + "learning_rate": 1.0956801253362288e-06, + "loss": 0.7732, + "step": 6922 + }, + { + "epoch": 1.44, + "learning_rate": 1.094920366311121e-06, + "loss": 1.0196, + "step": 6923 + }, + { + "epoch": 1.44, + "learning_rate": 1.0941608119866844e-06, + "loss": 1.2692, + "step": 6924 + }, + { + "epoch": 1.44, + "learning_rate": 1.0934014624445316e-06, + "loss": 1.0464, + "step": 6925 + }, + { + "epoch": 1.44, + "learning_rate": 1.0926423177662559e-06, + "loss": 0.8957, + "step": 6926 + }, + { + "epoch": 1.44, + "learning_rate": 1.0918833780334266e-06, + "loss": 0.8658, + "step": 6927 + }, + { + "epoch": 1.44, + "learning_rate": 1.0911246433275933e-06, + "loss": 1.0239, + "step": 6928 + }, + { + "epoch": 1.44, + "learning_rate": 1.09036611373028e-06, + "loss": 0.8621, + "step": 6929 + }, + { + "epoch": 1.44, + "learning_rate": 1.0896077893229916e-06, + "loss": 0.9479, + "step": 6930 + }, + { + "epoch": 1.44, + "learning_rate": 1.0888496701872096e-06, + "loss": 0.7468, + "step": 6931 + }, + { + "epoch": 1.44, + "learning_rate": 1.0880917564043953e-06, + "loss": 0.8467, + "step": 6932 + }, + { + "epoch": 1.44, + "learning_rate": 1.0873340480559844e-06, + "loss": 0.9431, + "step": 6933 + }, + { + "epoch": 1.44, + "learning_rate": 1.0865765452233938e-06, + "loss": 1.0123, + "step": 6934 + }, + { + "epoch": 1.44, + "learning_rate": 1.0858192479880164e-06, + "loss": 0.9207, + "step": 6935 + }, + { + "epoch": 1.44, + "learning_rate": 1.0850621564312255e-06, + "loss": 1.0186, + "step": 6936 + }, + { + "epoch": 1.44, + "learning_rate": 1.0843052706343682e-06, + "loss": 1.1213, + "step": 6937 + }, + { + "epoch": 1.44, + "learning_rate": 1.083548590678773e-06, + "loss": 0.8484, + "step": 6938 + }, + { + "epoch": 1.44, + "learning_rate": 1.0827921166457447e-06, + "loss": 0.8504, + "step": 6939 + }, + { + "epoch": 1.44, + "learning_rate": 1.0820358486165678e-06, + "loss": 0.8368, + "step": 6940 + }, + { + "epoch": 1.44, + "learning_rate": 1.081279786672501e-06, + "loss": 0.8847, + "step": 6941 + }, + { + "epoch": 1.44, + "learning_rate": 1.080523930894784e-06, + "loss": 0.9696, + "step": 6942 + }, + { + "epoch": 1.44, + "learning_rate": 1.0797682813646342e-06, + "loss": 0.8344, + "step": 6943 + }, + { + "epoch": 1.44, + "learning_rate": 1.0790128381632463e-06, + "loss": 1.098, + "step": 6944 + }, + { + "epoch": 1.44, + "learning_rate": 1.0782576013717909e-06, + "loss": 0.8419, + "step": 6945 + }, + { + "epoch": 1.44, + "learning_rate": 1.0775025710714186e-06, + "loss": 1.0648, + "step": 6946 + }, + { + "epoch": 1.44, + "learning_rate": 1.0767477473432585e-06, + "loss": 0.8576, + "step": 6947 + }, + { + "epoch": 1.45, + "learning_rate": 1.0759931302684165e-06, + "loss": 1.1398, + "step": 6948 + }, + { + "epoch": 1.45, + "learning_rate": 1.0752387199279744e-06, + "loss": 1.0138, + "step": 6949 + }, + { + "epoch": 1.45, + "learning_rate": 1.0744845164029943e-06, + "loss": 0.91, + "step": 6950 + }, + { + "epoch": 1.45, + "learning_rate": 1.0737305197745155e-06, + "loss": 0.9482, + "step": 6951 + }, + { + "epoch": 1.45, + "learning_rate": 1.0729767301235563e-06, + "loss": 1.0248, + "step": 6952 + }, + { + "epoch": 1.45, + "learning_rate": 1.0722231475311088e-06, + "loss": 0.8496, + "step": 6953 + }, + { + "epoch": 1.45, + "learning_rate": 1.0714697720781463e-06, + "loss": 0.9918, + "step": 6954 + }, + { + "epoch": 1.45, + "learning_rate": 1.0707166038456194e-06, + "loss": 1.0219, + "step": 6955 + }, + { + "epoch": 1.45, + "learning_rate": 1.0699636429144565e-06, + "loss": 1.1175, + "step": 6956 + }, + { + "epoch": 1.45, + "learning_rate": 1.0692108893655617e-06, + "loss": 0.8574, + "step": 6957 + }, + { + "epoch": 1.45, + "learning_rate": 1.068458343279819e-06, + "loss": 0.9708, + "step": 6958 + }, + { + "epoch": 1.45, + "learning_rate": 1.0677060047380894e-06, + "loss": 1.0254, + "step": 6959 + }, + { + "epoch": 1.45, + "learning_rate": 1.0669538738212128e-06, + "loss": 0.8846, + "step": 6960 + }, + { + "epoch": 1.45, + "learning_rate": 1.0662019506100034e-06, + "loss": 0.8671, + "step": 6961 + }, + { + "epoch": 1.45, + "learning_rate": 1.0654502351852565e-06, + "loss": 0.8968, + "step": 6962 + }, + { + "epoch": 1.45, + "learning_rate": 1.0646987276277438e-06, + "loss": 1.1522, + "step": 6963 + }, + { + "epoch": 1.45, + "learning_rate": 1.0639474280182156e-06, + "loss": 0.7665, + "step": 6964 + }, + { + "epoch": 1.45, + "learning_rate": 1.0631963364373973e-06, + "loss": 0.6807, + "step": 6965 + }, + { + "epoch": 1.45, + "learning_rate": 1.0624454529659942e-06, + "loss": 1.0461, + "step": 6966 + }, + { + "epoch": 1.45, + "learning_rate": 1.061694777684689e-06, + "loss": 0.8123, + "step": 6967 + }, + { + "epoch": 1.45, + "learning_rate": 1.0609443106741426e-06, + "loss": 0.7487, + "step": 6968 + }, + { + "epoch": 1.45, + "learning_rate": 1.060194052014991e-06, + "loss": 0.9206, + "step": 6969 + }, + { + "epoch": 1.45, + "learning_rate": 1.0594440017878495e-06, + "loss": 0.7423, + "step": 6970 + }, + { + "epoch": 1.45, + "learning_rate": 1.058694160073312e-06, + "loss": 0.9886, + "step": 6971 + }, + { + "epoch": 1.45, + "learning_rate": 1.0579445269519492e-06, + "loss": 0.7991, + "step": 6972 + }, + { + "epoch": 1.45, + "learning_rate": 1.0571951025043076e-06, + "loss": 1.005, + "step": 6973 + }, + { + "epoch": 1.45, + "learning_rate": 1.0564458868109135e-06, + "loss": 0.9329, + "step": 6974 + }, + { + "epoch": 1.45, + "learning_rate": 1.0556968799522701e-06, + "loss": 0.8669, + "step": 6975 + }, + { + "epoch": 1.45, + "learning_rate": 1.0549480820088595e-06, + "loss": 1.0039, + "step": 6976 + }, + { + "epoch": 1.45, + "learning_rate": 1.0541994930611373e-06, + "loss": 0.9639, + "step": 6977 + }, + { + "epoch": 1.45, + "learning_rate": 1.0534511131895412e-06, + "loss": 0.9464, + "step": 6978 + }, + { + "epoch": 1.45, + "learning_rate": 1.0527029424744837e-06, + "loss": 0.8774, + "step": 6979 + }, + { + "epoch": 1.45, + "learning_rate": 1.051954980996357e-06, + "loss": 0.9106, + "step": 6980 + }, + { + "epoch": 1.45, + "learning_rate": 1.0512072288355276e-06, + "loss": 0.9515, + "step": 6981 + }, + { + "epoch": 1.45, + "learning_rate": 1.0504596860723428e-06, + "loss": 1.0843, + "step": 6982 + }, + { + "epoch": 1.45, + "learning_rate": 1.0497123527871253e-06, + "loss": 0.962, + "step": 6983 + }, + { + "epoch": 1.45, + "learning_rate": 1.0489652290601773e-06, + "loss": 0.8703, + "step": 6984 + }, + { + "epoch": 1.45, + "learning_rate": 1.0482183149717754e-06, + "loss": 0.9278, + "step": 6985 + }, + { + "epoch": 1.45, + "learning_rate": 1.047471610602176e-06, + "loss": 0.8027, + "step": 6986 + }, + { + "epoch": 1.45, + "learning_rate": 1.046725116031613e-06, + "loss": 0.759, + "step": 6987 + }, + { + "epoch": 1.45, + "learning_rate": 1.045978831340298e-06, + "loss": 1.1695, + "step": 6988 + }, + { + "epoch": 1.45, + "learning_rate": 1.045232756608417e-06, + "loss": 0.7467, + "step": 6989 + }, + { + "epoch": 1.45, + "learning_rate": 1.0444868919161365e-06, + "loss": 0.9544, + "step": 6990 + }, + { + "epoch": 1.45, + "learning_rate": 1.0437412373436002e-06, + "loss": 0.7605, + "step": 6991 + }, + { + "epoch": 1.45, + "learning_rate": 1.0429957929709293e-06, + "loss": 0.8536, + "step": 6992 + }, + { + "epoch": 1.45, + "learning_rate": 1.0422505588782198e-06, + "loss": 0.9364, + "step": 6993 + }, + { + "epoch": 1.45, + "learning_rate": 1.0415055351455482e-06, + "loss": 1.0724, + "step": 6994 + }, + { + "epoch": 1.45, + "learning_rate": 1.0407607218529668e-06, + "loss": 1.0327, + "step": 6995 + }, + { + "epoch": 1.46, + "learning_rate": 1.040016119080507e-06, + "loss": 0.8101, + "step": 6996 + }, + { + "epoch": 1.46, + "learning_rate": 1.0392717269081748e-06, + "loss": 1.0086, + "step": 6997 + }, + { + "epoch": 1.46, + "learning_rate": 1.038527545415955e-06, + "loss": 0.8576, + "step": 6998 + }, + { + "epoch": 1.46, + "learning_rate": 1.0377835746838111e-06, + "loss": 1.0748, + "step": 6999 + }, + { + "epoch": 1.46, + "learning_rate": 1.0370398147916827e-06, + "loss": 1.1731, + "step": 7000 + }, + { + "epoch": 1.46, + "eval_loss": NaN, + "eval_runtime": 15.0705, + "eval_samples_per_second": 351.348, + "eval_steps_per_second": 43.927, + "step": 7000 + }, + { + "epoch": 1.46, + "learning_rate": 1.0362962658194854e-06, + "loss": 0.8256, + "step": 7001 + }, + { + "epoch": 1.46, + "learning_rate": 1.035552927847114e-06, + "loss": 0.8147, + "step": 7002 + }, + { + "epoch": 1.46, + "learning_rate": 1.0348098009544403e-06, + "loss": 0.8934, + "step": 7003 + }, + { + "epoch": 1.46, + "learning_rate": 1.0340668852213143e-06, + "loss": 1.0364, + "step": 7004 + }, + { + "epoch": 1.46, + "learning_rate": 1.0333241807275604e-06, + "loss": 0.802, + "step": 7005 + }, + { + "epoch": 1.46, + "learning_rate": 1.0325816875529827e-06, + "loss": 0.7792, + "step": 7006 + }, + { + "epoch": 1.46, + "learning_rate": 1.031839405777362e-06, + "loss": 0.9916, + "step": 7007 + }, + { + "epoch": 1.46, + "learning_rate": 1.0310973354804584e-06, + "loss": 0.9737, + "step": 7008 + }, + { + "epoch": 1.46, + "learning_rate": 1.030355476742004e-06, + "loss": 0.9474, + "step": 7009 + }, + { + "epoch": 1.46, + "learning_rate": 1.0296138296417134e-06, + "loss": 0.843, + "step": 7010 + }, + { + "epoch": 1.46, + "learning_rate": 1.0288723942592762e-06, + "loss": 0.7808, + "step": 7011 + }, + { + "epoch": 1.46, + "learning_rate": 1.0281311706743608e-06, + "loss": 0.8981, + "step": 7012 + }, + { + "epoch": 1.46, + "learning_rate": 1.027390158966609e-06, + "loss": 0.8691, + "step": 7013 + }, + { + "epoch": 1.46, + "learning_rate": 1.026649359215644e-06, + "loss": 0.8532, + "step": 7014 + }, + { + "epoch": 1.46, + "learning_rate": 1.0259087715010644e-06, + "loss": 0.9837, + "step": 7015 + }, + { + "epoch": 1.46, + "learning_rate": 1.0251683959024473e-06, + "loss": 0.8622, + "step": 7016 + }, + { + "epoch": 1.46, + "learning_rate": 1.0244282324993446e-06, + "loss": 0.8015, + "step": 7017 + }, + { + "epoch": 1.46, + "learning_rate": 1.0236882813712871e-06, + "loss": 1.0249, + "step": 7018 + }, + { + "epoch": 1.46, + "learning_rate": 1.0229485425977828e-06, + "loss": 0.7243, + "step": 7019 + }, + { + "epoch": 1.46, + "learning_rate": 1.0222090162583173e-06, + "loss": 0.7028, + "step": 7020 + }, + { + "epoch": 1.46, + "learning_rate": 1.02146970243235e-06, + "loss": 0.6859, + "step": 7021 + }, + { + "epoch": 1.46, + "learning_rate": 1.020730601199324e-06, + "loss": 0.9632, + "step": 7022 + }, + { + "epoch": 1.46, + "learning_rate": 1.0199917126386527e-06, + "loss": 0.9226, + "step": 7023 + }, + { + "epoch": 1.46, + "learning_rate": 1.0192530368297317e-06, + "loss": 0.838, + "step": 7024 + }, + { + "epoch": 1.46, + "learning_rate": 1.0185145738519283e-06, + "loss": 0.8639, + "step": 7025 + }, + { + "epoch": 1.46, + "learning_rate": 1.017776323784595e-06, + "loss": 0.9798, + "step": 7026 + }, + { + "epoch": 1.46, + "learning_rate": 1.017038286707053e-06, + "loss": 0.877, + "step": 7027 + }, + { + "epoch": 1.46, + "learning_rate": 1.0163004626986064e-06, + "loss": 0.9733, + "step": 7028 + }, + { + "epoch": 1.46, + "learning_rate": 1.0155628518385318e-06, + "loss": 1.1905, + "step": 7029 + }, + { + "epoch": 1.46, + "learning_rate": 1.0148254542060886e-06, + "loss": 0.7651, + "step": 7030 + }, + { + "epoch": 1.46, + "learning_rate": 1.0140882698805086e-06, + "loss": 0.7491, + "step": 7031 + }, + { + "epoch": 1.46, + "learning_rate": 1.0133512989410028e-06, + "loss": 0.8603, + "step": 7032 + }, + { + "epoch": 1.46, + "learning_rate": 1.0126145414667562e-06, + "loss": 0.9814, + "step": 7033 + }, + { + "epoch": 1.46, + "learning_rate": 1.011877997536937e-06, + "loss": 0.799, + "step": 7034 + }, + { + "epoch": 1.46, + "learning_rate": 1.0111416672306843e-06, + "loss": 0.7596, + "step": 7035 + }, + { + "epoch": 1.46, + "learning_rate": 1.0104055506271185e-06, + "loss": 0.9788, + "step": 7036 + }, + { + "epoch": 1.46, + "learning_rate": 1.0096696478053328e-06, + "loss": 1.0703, + "step": 7037 + }, + { + "epoch": 1.46, + "learning_rate": 1.008933958844403e-06, + "loss": 0.8649, + "step": 7038 + }, + { + "epoch": 1.46, + "learning_rate": 1.0081984838233764e-06, + "loss": 0.9511, + "step": 7039 + }, + { + "epoch": 1.46, + "learning_rate": 1.0074632228212813e-06, + "loss": 1.0418, + "step": 7040 + }, + { + "epoch": 1.46, + "learning_rate": 1.0067281759171188e-06, + "loss": 1.0438, + "step": 7041 + }, + { + "epoch": 1.46, + "learning_rate": 1.0059933431898737e-06, + "loss": 0.8478, + "step": 7042 + }, + { + "epoch": 1.46, + "learning_rate": 1.0052587247185005e-06, + "loss": 0.792, + "step": 7043 + }, + { + "epoch": 1.47, + "learning_rate": 1.004524320581936e-06, + "loss": 0.9582, + "step": 7044 + }, + { + "epoch": 1.47, + "learning_rate": 1.0037901308590888e-06, + "loss": 0.8292, + "step": 7045 + }, + { + "epoch": 1.47, + "learning_rate": 1.0030561556288517e-06, + "loss": 0.7458, + "step": 7046 + }, + { + "epoch": 1.47, + "learning_rate": 1.0023223949700872e-06, + "loss": 1.0316, + "step": 7047 + }, + { + "epoch": 1.47, + "learning_rate": 1.0015888489616395e-06, + "loss": 1.008, + "step": 7048 + }, + { + "epoch": 1.47, + "learning_rate": 1.0008555176823258e-06, + "loss": 0.7981, + "step": 7049 + }, + { + "epoch": 1.47, + "learning_rate": 1.000122401210946e-06, + "loss": 1.0081, + "step": 7050 + }, + { + "epoch": 1.47, + "learning_rate": 9.993894996262704e-07, + "loss": 0.9156, + "step": 7051 + }, + { + "epoch": 1.47, + "learning_rate": 9.986568130070511e-07, + "loss": 0.8844, + "step": 7052 + }, + { + "epoch": 1.47, + "learning_rate": 9.979243414320127e-07, + "loss": 0.8409, + "step": 7053 + }, + { + "epoch": 1.47, + "learning_rate": 9.971920849798628e-07, + "loss": 0.7789, + "step": 7054 + }, + { + "epoch": 1.47, + "learning_rate": 9.964600437292798e-07, + "loss": 0.7946, + "step": 7055 + }, + { + "epoch": 1.47, + "learning_rate": 9.957282177589227e-07, + "loss": 0.8788, + "step": 7056 + }, + { + "epoch": 1.47, + "learning_rate": 9.949966071474238e-07, + "loss": 0.9163, + "step": 7057 + }, + { + "epoch": 1.47, + "learning_rate": 9.942652119733985e-07, + "loss": 0.8832, + "step": 7058 + }, + { + "epoch": 1.47, + "learning_rate": 9.935340323154316e-07, + "loss": 0.7219, + "step": 7059 + }, + { + "epoch": 1.47, + "learning_rate": 9.92803068252091e-07, + "loss": 0.7828, + "step": 7060 + }, + { + "epoch": 1.47, + "learning_rate": 9.920723198619158e-07, + "loss": 1.0312, + "step": 7061 + }, + { + "epoch": 1.47, + "learning_rate": 9.91341787223428e-07, + "loss": 1.0333, + "step": 7062 + }, + { + "epoch": 1.47, + "learning_rate": 9.906114704151213e-07, + "loss": 0.75, + "step": 7063 + }, + { + "epoch": 1.47, + "learning_rate": 9.898813695154692e-07, + "loss": 0.9922, + "step": 7064 + }, + { + "epoch": 1.47, + "learning_rate": 9.891514846029189e-07, + "loss": 0.7877, + "step": 7065 + }, + { + "epoch": 1.47, + "learning_rate": 9.884218157558998e-07, + "loss": 0.8212, + "step": 7066 + }, + { + "epoch": 1.47, + "learning_rate": 9.876923630528125e-07, + "loss": 1.0042, + "step": 7067 + }, + { + "epoch": 1.47, + "learning_rate": 9.869631265720379e-07, + "loss": 0.8574, + "step": 7068 + }, + { + "epoch": 1.47, + "learning_rate": 9.862341063919296e-07, + "loss": 0.7124, + "step": 7069 + }, + { + "epoch": 1.47, + "learning_rate": 9.85505302590825e-07, + "loss": 0.8332, + "step": 7070 + }, + { + "epoch": 1.47, + "learning_rate": 9.84776715247031e-07, + "loss": 1.0196, + "step": 7071 + }, + { + "epoch": 1.47, + "learning_rate": 9.84048344438836e-07, + "loss": 0.8777, + "step": 7072 + }, + { + "epoch": 1.47, + "learning_rate": 9.833201902445007e-07, + "loss": 0.9115, + "step": 7073 + }, + { + "epoch": 1.47, + "learning_rate": 9.825922527422687e-07, + "loss": 0.9632, + "step": 7074 + }, + { + "epoch": 1.47, + "learning_rate": 9.818645320103545e-07, + "loss": 0.8554, + "step": 7075 + }, + { + "epoch": 1.47, + "learning_rate": 9.811370281269532e-07, + "loss": 0.8906, + "step": 7076 + }, + { + "epoch": 1.47, + "learning_rate": 9.804097411702325e-07, + "loss": 0.9603, + "step": 7077 + }, + { + "epoch": 1.47, + "learning_rate": 9.796826712183425e-07, + "loss": 0.8953, + "step": 7078 + }, + { + "epoch": 1.47, + "learning_rate": 9.789558183494044e-07, + "loss": 0.7952, + "step": 7079 + }, + { + "epoch": 1.47, + "learning_rate": 9.782291826415205e-07, + "loss": 0.8352, + "step": 7080 + }, + { + "epoch": 1.47, + "learning_rate": 9.775027641727647e-07, + "loss": 0.7592, + "step": 7081 + }, + { + "epoch": 1.47, + "learning_rate": 9.767765630211946e-07, + "loss": 0.9707, + "step": 7082 + }, + { + "epoch": 1.47, + "learning_rate": 9.760505792648372e-07, + "loss": 0.8329, + "step": 7083 + }, + { + "epoch": 1.47, + "learning_rate": 9.753248129817019e-07, + "loss": 0.9825, + "step": 7084 + }, + { + "epoch": 1.47, + "learning_rate": 9.745992642497689e-07, + "loss": 0.9627, + "step": 7085 + }, + { + "epoch": 1.47, + "learning_rate": 9.738739331470022e-07, + "loss": 0.9038, + "step": 7086 + }, + { + "epoch": 1.47, + "learning_rate": 9.731488197513362e-07, + "loss": 0.8155, + "step": 7087 + }, + { + "epoch": 1.47, + "learning_rate": 9.724239241406847e-07, + "loss": 0.8229, + "step": 7088 + }, + { + "epoch": 1.47, + "learning_rate": 9.71699246392938e-07, + "loss": 0.8636, + "step": 7089 + }, + { + "epoch": 1.47, + "learning_rate": 9.709747865859631e-07, + "loss": 0.7606, + "step": 7090 + }, + { + "epoch": 1.47, + "learning_rate": 9.70250544797602e-07, + "loss": 0.8526, + "step": 7091 + }, + { + "epoch": 1.48, + "learning_rate": 9.695265211056747e-07, + "loss": 0.9806, + "step": 7092 + }, + { + "epoch": 1.48, + "learning_rate": 9.68802715587978e-07, + "loss": 1.112, + "step": 7093 + }, + { + "epoch": 1.48, + "learning_rate": 9.680791283222859e-07, + "loss": 0.8522, + "step": 7094 + }, + { + "epoch": 1.48, + "learning_rate": 9.673557593863448e-07, + "loss": 0.7918, + "step": 7095 + }, + { + "epoch": 1.48, + "learning_rate": 9.666326088578827e-07, + "loss": 0.8545, + "step": 7096 + }, + { + "epoch": 1.48, + "learning_rate": 9.659096768146018e-07, + "loss": 0.9736, + "step": 7097 + }, + { + "epoch": 1.48, + "learning_rate": 9.651869633341817e-07, + "loss": 0.8617, + "step": 7098 + }, + { + "epoch": 1.48, + "learning_rate": 9.644644684942764e-07, + "loss": 0.6984, + "step": 7099 + }, + { + "epoch": 1.48, + "learning_rate": 9.637421923725187e-07, + "loss": 0.9869, + "step": 7100 + }, + { + "epoch": 1.48, + "learning_rate": 9.63020135046517e-07, + "loss": 0.7819, + "step": 7101 + }, + { + "epoch": 1.48, + "learning_rate": 9.622982965938574e-07, + "loss": 1.0986, + "step": 7102 + }, + { + "epoch": 1.48, + "learning_rate": 9.615766770920994e-07, + "loss": 0.9586, + "step": 7103 + }, + { + "epoch": 1.48, + "learning_rate": 9.60855276618782e-07, + "loss": 0.8248, + "step": 7104 + }, + { + "epoch": 1.48, + "learning_rate": 9.601340952514197e-07, + "loss": 0.9166, + "step": 7105 + }, + { + "epoch": 1.48, + "learning_rate": 9.594131330675045e-07, + "loss": 0.8351, + "step": 7106 + }, + { + "epoch": 1.48, + "learning_rate": 9.586923901445012e-07, + "loss": 1.0555, + "step": 7107 + }, + { + "epoch": 1.48, + "learning_rate": 9.579718665598553e-07, + "loss": 0.9224, + "step": 7108 + }, + { + "epoch": 1.48, + "learning_rate": 9.572515623909861e-07, + "loss": 1.0633, + "step": 7109 + }, + { + "epoch": 1.48, + "learning_rate": 9.565314777152922e-07, + "loss": 0.845, + "step": 7110 + }, + { + "epoch": 1.48, + "learning_rate": 9.558116126101439e-07, + "loss": 0.8362, + "step": 7111 + }, + { + "epoch": 1.48, + "learning_rate": 9.550919671528924e-07, + "loss": 0.7566, + "step": 7112 + }, + { + "epoch": 1.48, + "learning_rate": 9.543725414208624e-07, + "loss": 0.7619, + "step": 7113 + }, + { + "epoch": 1.48, + "learning_rate": 9.536533354913584e-07, + "loss": 0.7315, + "step": 7114 + }, + { + "epoch": 1.48, + "learning_rate": 9.529343494416563e-07, + "loss": 0.8001, + "step": 7115 + }, + { + "epoch": 1.48, + "learning_rate": 9.522155833490123e-07, + "loss": 0.9307, + "step": 7116 + }, + { + "epoch": 1.48, + "learning_rate": 9.514970372906576e-07, + "loss": 0.9465, + "step": 7117 + }, + { + "epoch": 1.48, + "learning_rate": 9.507787113438014e-07, + "loss": 0.7748, + "step": 7118 + }, + { + "epoch": 1.48, + "learning_rate": 9.500606055856254e-07, + "loss": 1.1619, + "step": 7119 + }, + { + "epoch": 1.48, + "learning_rate": 9.493427200932906e-07, + "loss": 0.8031, + "step": 7120 + }, + { + "epoch": 1.48, + "learning_rate": 9.486250549439347e-07, + "loss": 0.8828, + "step": 7121 + }, + { + "epoch": 1.48, + "learning_rate": 9.47907610214671e-07, + "loss": 0.6354, + "step": 7122 + }, + { + "epoch": 1.48, + "learning_rate": 9.471903859825871e-07, + "loss": 0.8046, + "step": 7123 + }, + { + "epoch": 1.48, + "learning_rate": 9.464733823247497e-07, + "loss": 0.8536, + "step": 7124 + }, + { + "epoch": 1.48, + "learning_rate": 9.457565993182011e-07, + "loss": 0.6967, + "step": 7125 + }, + { + "epoch": 1.48, + "learning_rate": 9.450400370399602e-07, + "loss": 0.9006, + "step": 7126 + }, + { + "epoch": 1.48, + "learning_rate": 9.443236955670198e-07, + "loss": 0.6944, + "step": 7127 + }, + { + "epoch": 1.48, + "learning_rate": 9.436075749763515e-07, + "loss": 1.1203, + "step": 7128 + }, + { + "epoch": 1.48, + "learning_rate": 9.428916753449025e-07, + "loss": 0.9868, + "step": 7129 + }, + { + "epoch": 1.48, + "learning_rate": 9.421759967495972e-07, + "loss": 0.9606, + "step": 7130 + }, + { + "epoch": 1.48, + "learning_rate": 9.414605392673331e-07, + "loss": 0.7938, + "step": 7131 + }, + { + "epoch": 1.48, + "learning_rate": 9.407453029749874e-07, + "loss": 0.8637, + "step": 7132 + }, + { + "epoch": 1.48, + "learning_rate": 9.400302879494119e-07, + "loss": 0.8242, + "step": 7133 + }, + { + "epoch": 1.48, + "learning_rate": 9.393154942674359e-07, + "loss": 0.8339, + "step": 7134 + }, + { + "epoch": 1.48, + "learning_rate": 9.386009220058618e-07, + "loss": 0.8894, + "step": 7135 + }, + { + "epoch": 1.48, + "learning_rate": 9.378865712414718e-07, + "loss": 0.9755, + "step": 7136 + }, + { + "epoch": 1.48, + "learning_rate": 9.371724420510226e-07, + "loss": 0.7256, + "step": 7137 + }, + { + "epoch": 1.48, + "learning_rate": 9.364585345112482e-07, + "loss": 0.9301, + "step": 7138 + }, + { + "epoch": 1.48, + "learning_rate": 9.357448486988559e-07, + "loss": 0.9208, + "step": 7139 + }, + { + "epoch": 1.49, + "learning_rate": 9.350313846905324e-07, + "loss": 0.8832, + "step": 7140 + }, + { + "epoch": 1.49, + "learning_rate": 9.343181425629393e-07, + "loss": 1.1291, + "step": 7141 + }, + { + "epoch": 1.49, + "learning_rate": 9.336051223927155e-07, + "loss": 0.9667, + "step": 7142 + }, + { + "epoch": 1.49, + "learning_rate": 9.328923242564725e-07, + "loss": 1.1314, + "step": 7143 + }, + { + "epoch": 1.49, + "learning_rate": 9.32179748230802e-07, + "loss": 0.7579, + "step": 7144 + }, + { + "epoch": 1.49, + "learning_rate": 9.314673943922699e-07, + "loss": 0.8001, + "step": 7145 + }, + { + "epoch": 1.49, + "learning_rate": 9.307552628174196e-07, + "loss": 0.8538, + "step": 7146 + }, + { + "epoch": 1.49, + "learning_rate": 9.300433535827679e-07, + "loss": 1.0277, + "step": 7147 + }, + { + "epoch": 1.49, + "learning_rate": 9.2933166676481e-07, + "loss": 0.7127, + "step": 7148 + }, + { + "epoch": 1.49, + "learning_rate": 9.286202024400169e-07, + "loss": 0.8119, + "step": 7149 + }, + { + "epoch": 1.49, + "learning_rate": 9.279089606848359e-07, + "loss": 1.1709, + "step": 7150 + }, + { + "epoch": 1.49, + "learning_rate": 9.271979415756885e-07, + "loss": 0.8502, + "step": 7151 + }, + { + "epoch": 1.49, + "learning_rate": 9.264871451889742e-07, + "loss": 1.0141, + "step": 7152 + }, + { + "epoch": 1.49, + "learning_rate": 9.257765716010685e-07, + "loss": 0.9561, + "step": 7153 + }, + { + "epoch": 1.49, + "learning_rate": 9.250662208883226e-07, + "loss": 0.8967, + "step": 7154 + }, + { + "epoch": 1.49, + "learning_rate": 9.243560931270617e-07, + "loss": 0.8681, + "step": 7155 + }, + { + "epoch": 1.49, + "learning_rate": 9.236461883935924e-07, + "loss": 0.7292, + "step": 7156 + }, + { + "epoch": 1.49, + "learning_rate": 9.22936506764191e-07, + "loss": 0.9458, + "step": 7157 + }, + { + "epoch": 1.49, + "learning_rate": 9.222270483151144e-07, + "loss": 1.0712, + "step": 7158 + }, + { + "epoch": 1.49, + "learning_rate": 9.215178131225925e-07, + "loss": 0.8696, + "step": 7159 + }, + { + "epoch": 1.49, + "learning_rate": 9.20808801262833e-07, + "loss": 1.1702, + "step": 7160 + }, + { + "epoch": 1.49, + "learning_rate": 9.201000128120197e-07, + "loss": 0.9397, + "step": 7161 + }, + { + "epoch": 1.49, + "learning_rate": 9.193914478463128e-07, + "loss": 1.0369, + "step": 7162 + }, + { + "epoch": 1.49, + "learning_rate": 9.18683106441845e-07, + "loss": 0.8191, + "step": 7163 + }, + { + "epoch": 1.49, + "learning_rate": 9.179749886747289e-07, + "loss": 0.906, + "step": 7164 + }, + { + "epoch": 1.49, + "learning_rate": 9.172670946210519e-07, + "loss": 0.7482, + "step": 7165 + }, + { + "epoch": 1.49, + "learning_rate": 9.165594243568779e-07, + "loss": 0.8159, + "step": 7166 + }, + { + "epoch": 1.49, + "learning_rate": 9.158519779582439e-07, + "loss": 0.8754, + "step": 7167 + }, + { + "epoch": 1.49, + "learning_rate": 9.151447555011663e-07, + "loss": 0.8885, + "step": 7168 + }, + { + "epoch": 1.49, + "learning_rate": 9.144377570616358e-07, + "loss": 0.8048, + "step": 7169 + }, + { + "epoch": 1.49, + "learning_rate": 9.137309827156207e-07, + "loss": 1.0388, + "step": 7170 + }, + { + "epoch": 1.49, + "learning_rate": 9.130244325390616e-07, + "loss": 0.8239, + "step": 7171 + }, + { + "epoch": 1.49, + "learning_rate": 9.123181066078781e-07, + "loss": 0.7858, + "step": 7172 + }, + { + "epoch": 1.49, + "learning_rate": 9.116120049979652e-07, + "loss": 0.9194, + "step": 7173 + }, + { + "epoch": 1.49, + "learning_rate": 9.109061277851946e-07, + "loss": 0.9224, + "step": 7174 + }, + { + "epoch": 1.49, + "learning_rate": 9.102004750454101e-07, + "loss": 0.8489, + "step": 7175 + }, + { + "epoch": 1.49, + "learning_rate": 9.094950468544358e-07, + "loss": 1.1737, + "step": 7176 + }, + { + "epoch": 1.49, + "learning_rate": 9.08789843288069e-07, + "loss": 1.0209, + "step": 7177 + }, + { + "epoch": 1.49, + "learning_rate": 9.080848644220856e-07, + "loss": 0.88, + "step": 7178 + }, + { + "epoch": 1.49, + "learning_rate": 9.073801103322336e-07, + "loss": 0.9117, + "step": 7179 + }, + { + "epoch": 1.49, + "learning_rate": 9.06675581094239e-07, + "loss": 0.7673, + "step": 7180 + }, + { + "epoch": 1.49, + "learning_rate": 9.05971276783804e-07, + "loss": 0.9266, + "step": 7181 + }, + { + "epoch": 1.49, + "learning_rate": 9.052671974766066e-07, + "loss": 0.9331, + "step": 7182 + }, + { + "epoch": 1.49, + "learning_rate": 9.045633432482986e-07, + "loss": 0.7808, + "step": 7183 + }, + { + "epoch": 1.49, + "learning_rate": 9.038597141745099e-07, + "loss": 1.0405, + "step": 7184 + }, + { + "epoch": 1.49, + "learning_rate": 9.031563103308454e-07, + "loss": 0.8641, + "step": 7185 + }, + { + "epoch": 1.49, + "learning_rate": 9.024531317928862e-07, + "loss": 0.8183, + "step": 7186 + }, + { + "epoch": 1.49, + "learning_rate": 9.017501786361878e-07, + "loss": 0.8151, + "step": 7187 + }, + { + "epoch": 1.5, + "learning_rate": 9.010474509362829e-07, + "loss": 1.1236, + "step": 7188 + }, + { + "epoch": 1.5, + "learning_rate": 9.003449487686795e-07, + "loss": 0.8543, + "step": 7189 + }, + { + "epoch": 1.5, + "learning_rate": 8.996426722088627e-07, + "loss": 0.9915, + "step": 7190 + }, + { + "epoch": 1.5, + "learning_rate": 8.989406213322898e-07, + "loss": 1.0492, + "step": 7191 + }, + { + "epoch": 1.5, + "learning_rate": 8.982387962143971e-07, + "loss": 0.9803, + "step": 7192 + }, + { + "epoch": 1.5, + "learning_rate": 8.975371969305962e-07, + "loss": 0.9396, + "step": 7193 + }, + { + "epoch": 1.5, + "learning_rate": 8.968358235562739e-07, + "loss": 0.87, + "step": 7194 + }, + { + "epoch": 1.5, + "learning_rate": 8.961346761667916e-07, + "loss": 0.7566, + "step": 7195 + }, + { + "epoch": 1.5, + "learning_rate": 8.954337548374881e-07, + "loss": 0.873, + "step": 7196 + }, + { + "epoch": 1.5, + "learning_rate": 8.947330596436773e-07, + "loss": 0.9511, + "step": 7197 + }, + { + "epoch": 1.5, + "learning_rate": 8.940325906606503e-07, + "loss": 0.8341, + "step": 7198 + }, + { + "epoch": 1.5, + "learning_rate": 8.933323479636702e-07, + "loss": 0.8252, + "step": 7199 + }, + { + "epoch": 1.5, + "learning_rate": 8.926323316279789e-07, + "loss": 0.8038, + "step": 7200 + }, + { + "epoch": 1.5, + "learning_rate": 8.91932541728793e-07, + "loss": 0.9062, + "step": 7201 + }, + { + "epoch": 1.5, + "learning_rate": 8.912329783413062e-07, + "loss": 1.1006, + "step": 7202 + }, + { + "epoch": 1.5, + "learning_rate": 8.905336415406844e-07, + "loss": 1.0372, + "step": 7203 + }, + { + "epoch": 1.5, + "learning_rate": 8.898345314020724e-07, + "loss": 0.994, + "step": 7204 + }, + { + "epoch": 1.5, + "learning_rate": 8.891356480005891e-07, + "loss": 0.8434, + "step": 7205 + }, + { + "epoch": 1.5, + "learning_rate": 8.884369914113311e-07, + "loss": 0.8015, + "step": 7206 + }, + { + "epoch": 1.5, + "learning_rate": 8.877385617093665e-07, + "loss": 0.9276, + "step": 7207 + }, + { + "epoch": 1.5, + "learning_rate": 8.870403589697431e-07, + "loss": 1.0067, + "step": 7208 + }, + { + "epoch": 1.5, + "learning_rate": 8.86342383267482e-07, + "loss": 0.7242, + "step": 7209 + }, + { + "epoch": 1.5, + "learning_rate": 8.856446346775821e-07, + "loss": 0.9407, + "step": 7210 + }, + { + "epoch": 1.5, + "learning_rate": 8.849471132750141e-07, + "loss": 1.167, + "step": 7211 + }, + { + "epoch": 1.5, + "learning_rate": 8.842498191347281e-07, + "loss": 1.0467, + "step": 7212 + }, + { + "epoch": 1.5, + "learning_rate": 8.835527523316483e-07, + "loss": 0.8378, + "step": 7213 + }, + { + "epoch": 1.5, + "learning_rate": 8.828559129406748e-07, + "loss": 1.106, + "step": 7214 + }, + { + "epoch": 1.5, + "learning_rate": 8.821593010366812e-07, + "loss": 0.8599, + "step": 7215 + }, + { + "epoch": 1.5, + "learning_rate": 8.814629166945202e-07, + "loss": 0.8196, + "step": 7216 + }, + { + "epoch": 1.5, + "learning_rate": 8.807667599890173e-07, + "loss": 0.8725, + "step": 7217 + }, + { + "epoch": 1.5, + "learning_rate": 8.800708309949756e-07, + "loss": 0.9872, + "step": 7218 + }, + { + "epoch": 1.5, + "learning_rate": 8.793751297871713e-07, + "loss": 0.8901, + "step": 7219 + }, + { + "epoch": 1.5, + "learning_rate": 8.786796564403577e-07, + "loss": 0.945, + "step": 7220 + }, + { + "epoch": 1.5, + "learning_rate": 8.779844110292636e-07, + "loss": 0.9393, + "step": 7221 + }, + { + "epoch": 1.5, + "learning_rate": 8.772893936285932e-07, + "loss": 0.7743, + "step": 7222 + }, + { + "epoch": 1.5, + "learning_rate": 8.765946043130269e-07, + "loss": 0.729, + "step": 7223 + }, + { + "epoch": 1.5, + "learning_rate": 8.75900043157218e-07, + "loss": 0.8709, + "step": 7224 + }, + { + "epoch": 1.5, + "learning_rate": 8.752057102357977e-07, + "loss": 0.9245, + "step": 7225 + }, + { + "epoch": 1.5, + "learning_rate": 8.745116056233726e-07, + "loss": 0.7085, + "step": 7226 + }, + { + "epoch": 1.5, + "learning_rate": 8.738177293945245e-07, + "loss": 0.8551, + "step": 7227 + }, + { + "epoch": 1.5, + "learning_rate": 8.731240816238085e-07, + "loss": 1.155, + "step": 7228 + }, + { + "epoch": 1.5, + "learning_rate": 8.724306623857583e-07, + "loss": 0.8069, + "step": 7229 + }, + { + "epoch": 1.5, + "learning_rate": 8.717374717548819e-07, + "loss": 1.0062, + "step": 7230 + }, + { + "epoch": 1.5, + "learning_rate": 8.710445098056629e-07, + "loss": 0.9347, + "step": 7231 + }, + { + "epoch": 1.5, + "learning_rate": 8.703517766125585e-07, + "loss": 0.8643, + "step": 7232 + }, + { + "epoch": 1.5, + "learning_rate": 8.69659272250004e-07, + "loss": 0.8018, + "step": 7233 + }, + { + "epoch": 1.5, + "learning_rate": 8.689669967924084e-07, + "loss": 0.9335, + "step": 7234 + }, + { + "epoch": 1.5, + "learning_rate": 8.68274950314158e-07, + "loss": 1.0423, + "step": 7235 + }, + { + "epoch": 1.5, + "learning_rate": 8.67583132889611e-07, + "loss": 1.0279, + "step": 7236 + }, + { + "epoch": 1.51, + "learning_rate": 8.668915445931042e-07, + "loss": 0.8082, + "step": 7237 + }, + { + "epoch": 1.51, + "learning_rate": 8.662001854989488e-07, + "loss": 0.8277, + "step": 7238 + }, + { + "epoch": 1.51, + "learning_rate": 8.655090556814323e-07, + "loss": 0.8882, + "step": 7239 + }, + { + "epoch": 1.51, + "learning_rate": 8.648181552148142e-07, + "loss": 1.0265, + "step": 7240 + }, + { + "epoch": 1.51, + "learning_rate": 8.641274841733333e-07, + "loss": 0.961, + "step": 7241 + }, + { + "epoch": 1.51, + "learning_rate": 8.634370426312016e-07, + "loss": 0.931, + "step": 7242 + }, + { + "epoch": 1.51, + "learning_rate": 8.627468306626081e-07, + "loss": 0.7398, + "step": 7243 + }, + { + "epoch": 1.51, + "learning_rate": 8.620568483417144e-07, + "loss": 1.0996, + "step": 7244 + }, + { + "epoch": 1.51, + "learning_rate": 8.613670957426596e-07, + "loss": 0.8032, + "step": 7245 + }, + { + "epoch": 1.51, + "learning_rate": 8.606775729395578e-07, + "loss": 1.2966, + "step": 7246 + }, + { + "epoch": 1.51, + "learning_rate": 8.599882800064988e-07, + "loss": 0.9252, + "step": 7247 + }, + { + "epoch": 1.51, + "learning_rate": 8.592992170175457e-07, + "loss": 0.8785, + "step": 7248 + }, + { + "epoch": 1.51, + "learning_rate": 8.586103840467392e-07, + "loss": 0.759, + "step": 7249 + }, + { + "epoch": 1.51, + "learning_rate": 8.579217811680939e-07, + "loss": 0.7976, + "step": 7250 + }, + { + "epoch": 1.51, + "learning_rate": 8.572334084556015e-07, + "loss": 0.8906, + "step": 7251 + }, + { + "epoch": 1.51, + "learning_rate": 8.565452659832253e-07, + "loss": 0.9473, + "step": 7252 + }, + { + "epoch": 1.51, + "learning_rate": 8.558573538249074e-07, + "loss": 0.9632, + "step": 7253 + }, + { + "epoch": 1.51, + "learning_rate": 8.551696720545637e-07, + "loss": 0.9636, + "step": 7254 + }, + { + "epoch": 1.51, + "learning_rate": 8.544822207460868e-07, + "loss": 0.8138, + "step": 7255 + }, + { + "epoch": 1.51, + "learning_rate": 8.537949999733412e-07, + "loss": 0.8632, + "step": 7256 + }, + { + "epoch": 1.51, + "learning_rate": 8.531080098101698e-07, + "loss": 0.8853, + "step": 7257 + }, + { + "epoch": 1.51, + "learning_rate": 8.524212503303894e-07, + "loss": 0.7394, + "step": 7258 + }, + { + "epoch": 1.51, + "learning_rate": 8.517347216077931e-07, + "loss": 0.9825, + "step": 7259 + }, + { + "epoch": 1.51, + "learning_rate": 8.510484237161471e-07, + "loss": 0.8937, + "step": 7260 + }, + { + "epoch": 1.51, + "learning_rate": 8.503623567291945e-07, + "loss": 0.9148, + "step": 7261 + }, + { + "epoch": 1.51, + "learning_rate": 8.496765207206532e-07, + "loss": 1.1823, + "step": 7262 + }, + { + "epoch": 1.51, + "learning_rate": 8.489909157642175e-07, + "loss": 1.0261, + "step": 7263 + }, + { + "epoch": 1.51, + "learning_rate": 8.483055419335532e-07, + "loss": 0.8799, + "step": 7264 + }, + { + "epoch": 1.51, + "learning_rate": 8.476203993023049e-07, + "loss": 0.777, + "step": 7265 + }, + { + "epoch": 1.51, + "learning_rate": 8.469354879440914e-07, + "loss": 1.0947, + "step": 7266 + }, + { + "epoch": 1.51, + "learning_rate": 8.462508079325066e-07, + "loss": 0.9287, + "step": 7267 + }, + { + "epoch": 1.51, + "learning_rate": 8.455663593411182e-07, + "loss": 0.8867, + "step": 7268 + }, + { + "epoch": 1.51, + "learning_rate": 8.448821422434707e-07, + "loss": 0.841, + "step": 7269 + }, + { + "epoch": 1.51, + "learning_rate": 8.441981567130832e-07, + "loss": 1.0281, + "step": 7270 + }, + { + "epoch": 1.51, + "learning_rate": 8.435144028234506e-07, + "loss": 0.7633, + "step": 7271 + }, + { + "epoch": 1.51, + "learning_rate": 8.428308806480412e-07, + "loss": 0.7702, + "step": 7272 + }, + { + "epoch": 1.51, + "learning_rate": 8.42147590260299e-07, + "loss": 0.7581, + "step": 7273 + }, + { + "epoch": 1.51, + "learning_rate": 8.414645317336447e-07, + "loss": 0.7504, + "step": 7274 + }, + { + "epoch": 1.51, + "learning_rate": 8.407817051414732e-07, + "loss": 0.8066, + "step": 7275 + }, + { + "epoch": 1.51, + "learning_rate": 8.400991105571527e-07, + "loss": 0.937, + "step": 7276 + }, + { + "epoch": 1.51, + "learning_rate": 8.394167480540285e-07, + "loss": 0.9826, + "step": 7277 + }, + { + "epoch": 1.51, + "learning_rate": 8.387346177054205e-07, + "loss": 0.7538, + "step": 7278 + }, + { + "epoch": 1.51, + "learning_rate": 8.380527195846244e-07, + "loss": 0.8018, + "step": 7279 + }, + { + "epoch": 1.51, + "learning_rate": 8.373710537649088e-07, + "loss": 0.8776, + "step": 7280 + }, + { + "epoch": 1.51, + "learning_rate": 8.366896203195187e-07, + "loss": 0.8101, + "step": 7281 + }, + { + "epoch": 1.51, + "learning_rate": 8.360084193216747e-07, + "loss": 0.8265, + "step": 7282 + }, + { + "epoch": 1.51, + "learning_rate": 8.353274508445728e-07, + "loss": 0.9107, + "step": 7283 + }, + { + "epoch": 1.51, + "learning_rate": 8.346467149613806e-07, + "loss": 0.7321, + "step": 7284 + }, + { + "epoch": 1.52, + "learning_rate": 8.339662117452453e-07, + "loss": 0.9562, + "step": 7285 + }, + { + "epoch": 1.52, + "learning_rate": 8.332859412692842e-07, + "loss": 0.8093, + "step": 7286 + }, + { + "epoch": 1.52, + "learning_rate": 8.326059036065962e-07, + "loss": 1.0632, + "step": 7287 + }, + { + "epoch": 1.52, + "learning_rate": 8.31926098830248e-07, + "loss": 0.9873, + "step": 7288 + }, + { + "epoch": 1.52, + "learning_rate": 8.312465270132855e-07, + "loss": 0.7481, + "step": 7289 + }, + { + "epoch": 1.52, + "learning_rate": 8.305671882287294e-07, + "loss": 0.983, + "step": 7290 + }, + { + "epoch": 1.52, + "learning_rate": 8.298880825495747e-07, + "loss": 0.7828, + "step": 7291 + }, + { + "epoch": 1.52, + "learning_rate": 8.292092100487899e-07, + "loss": 0.8977, + "step": 7292 + }, + { + "epoch": 1.52, + "learning_rate": 8.285305707993207e-07, + "loss": 0.9454, + "step": 7293 + }, + { + "epoch": 1.52, + "learning_rate": 8.278521648740864e-07, + "loss": 0.7993, + "step": 7294 + }, + { + "epoch": 1.52, + "learning_rate": 8.271739923459831e-07, + "loss": 0.8439, + "step": 7295 + }, + { + "epoch": 1.52, + "learning_rate": 8.264960532878785e-07, + "loss": 0.9723, + "step": 7296 + }, + { + "epoch": 1.52, + "learning_rate": 8.258183477726179e-07, + "loss": 0.7659, + "step": 7297 + }, + { + "epoch": 1.52, + "learning_rate": 8.251408758730204e-07, + "loss": 0.8238, + "step": 7298 + }, + { + "epoch": 1.52, + "learning_rate": 8.244636376618816e-07, + "loss": 0.7856, + "step": 7299 + }, + { + "epoch": 1.52, + "learning_rate": 8.237866332119688e-07, + "loss": 0.8813, + "step": 7300 + }, + { + "epoch": 1.52, + "learning_rate": 8.231098625960268e-07, + "loss": 0.9926, + "step": 7301 + }, + { + "epoch": 1.52, + "learning_rate": 8.224333258867749e-07, + "loss": 1.1137, + "step": 7302 + }, + { + "epoch": 1.52, + "learning_rate": 8.217570231569078e-07, + "loss": 0.8462, + "step": 7303 + }, + { + "epoch": 1.52, + "learning_rate": 8.210809544790921e-07, + "loss": 1.1649, + "step": 7304 + }, + { + "epoch": 1.52, + "learning_rate": 8.204051199259721e-07, + "loss": 0.8149, + "step": 7305 + }, + { + "epoch": 1.52, + "learning_rate": 8.19729519570167e-07, + "loss": 0.9426, + "step": 7306 + }, + { + "epoch": 1.52, + "learning_rate": 8.1905415348427e-07, + "loss": 0.9498, + "step": 7307 + }, + { + "epoch": 1.52, + "learning_rate": 8.183790217408476e-07, + "loss": 0.7899, + "step": 7308 + }, + { + "epoch": 1.52, + "learning_rate": 8.177041244124435e-07, + "loss": 1.194, + "step": 7309 + }, + { + "epoch": 1.52, + "learning_rate": 8.170294615715759e-07, + "loss": 0.8114, + "step": 7310 + }, + { + "epoch": 1.52, + "learning_rate": 8.163550332907376e-07, + "loss": 1.0289, + "step": 7311 + }, + { + "epoch": 1.52, + "learning_rate": 8.156808396423943e-07, + "loss": 1.0659, + "step": 7312 + }, + { + "epoch": 1.52, + "learning_rate": 8.150068806989887e-07, + "loss": 0.7266, + "step": 7313 + }, + { + "epoch": 1.52, + "learning_rate": 8.143331565329382e-07, + "loss": 1.0634, + "step": 7314 + }, + { + "epoch": 1.52, + "learning_rate": 8.136596672166346e-07, + "loss": 0.9357, + "step": 7315 + }, + { + "epoch": 1.52, + "learning_rate": 8.12986412822443e-07, + "loss": 0.9142, + "step": 7316 + }, + { + "epoch": 1.52, + "learning_rate": 8.123133934227054e-07, + "loss": 0.7234, + "step": 7317 + }, + { + "epoch": 1.52, + "learning_rate": 8.116406090897374e-07, + "loss": 1.0619, + "step": 7318 + }, + { + "epoch": 1.52, + "learning_rate": 8.109680598958307e-07, + "loss": 0.9543, + "step": 7319 + }, + { + "epoch": 1.52, + "learning_rate": 8.102957459132491e-07, + "loss": 0.8676, + "step": 7320 + }, + { + "epoch": 1.52, + "learning_rate": 8.096236672142332e-07, + "loss": 0.9199, + "step": 7321 + }, + { + "epoch": 1.52, + "learning_rate": 8.089518238709978e-07, + "loss": 0.9184, + "step": 7322 + }, + { + "epoch": 1.52, + "learning_rate": 8.082802159557337e-07, + "loss": 0.9006, + "step": 7323 + }, + { + "epoch": 1.52, + "learning_rate": 8.076088435406028e-07, + "loss": 1.0599, + "step": 7324 + }, + { + "epoch": 1.52, + "learning_rate": 8.069377066977455e-07, + "loss": 1.0387, + "step": 7325 + }, + { + "epoch": 1.52, + "learning_rate": 8.06266805499275e-07, + "loss": 0.9081, + "step": 7326 + }, + { + "epoch": 1.52, + "learning_rate": 8.055961400172808e-07, + "loss": 0.9926, + "step": 7327 + }, + { + "epoch": 1.52, + "learning_rate": 8.049257103238236e-07, + "loss": 0.8908, + "step": 7328 + }, + { + "epoch": 1.52, + "learning_rate": 8.042555164909424e-07, + "loss": 0.7537, + "step": 7329 + }, + { + "epoch": 1.52, + "learning_rate": 8.03585558590649e-07, + "loss": 1.0474, + "step": 7330 + }, + { + "epoch": 1.52, + "learning_rate": 8.02915836694932e-07, + "loss": 0.7432, + "step": 7331 + }, + { + "epoch": 1.52, + "learning_rate": 8.022463508757501e-07, + "loss": 0.8103, + "step": 7332 + }, + { + "epoch": 1.53, + "learning_rate": 8.015771012050415e-07, + "loss": 0.9893, + "step": 7333 + }, + { + "epoch": 1.53, + "learning_rate": 8.00908087754716e-07, + "loss": 0.9498, + "step": 7334 + }, + { + "epoch": 1.53, + "learning_rate": 8.002393105966607e-07, + "loss": 0.8921, + "step": 7335 + }, + { + "epoch": 1.53, + "learning_rate": 7.995707698027335e-07, + "loss": 0.9996, + "step": 7336 + }, + { + "epoch": 1.53, + "learning_rate": 7.989024654447703e-07, + "loss": 0.9327, + "step": 7337 + }, + { + "epoch": 1.53, + "learning_rate": 7.982343975945797e-07, + "loss": 0.8578, + "step": 7338 + }, + { + "epoch": 1.53, + "learning_rate": 7.975665663239471e-07, + "loss": 1.0309, + "step": 7339 + }, + { + "epoch": 1.53, + "learning_rate": 7.96898971704629e-07, + "loss": 1.0022, + "step": 7340 + }, + { + "epoch": 1.53, + "learning_rate": 7.962316138083591e-07, + "loss": 1.2361, + "step": 7341 + }, + { + "epoch": 1.53, + "learning_rate": 7.95564492706845e-07, + "loss": 0.8481, + "step": 7342 + }, + { + "epoch": 1.53, + "learning_rate": 7.948976084717696e-07, + "loss": 0.7888, + "step": 7343 + }, + { + "epoch": 1.53, + "learning_rate": 7.942309611747881e-07, + "loss": 0.932, + "step": 7344 + }, + { + "epoch": 1.53, + "learning_rate": 7.935645508875324e-07, + "loss": 0.828, + "step": 7345 + }, + { + "epoch": 1.53, + "learning_rate": 7.928983776816084e-07, + "loss": 0.7522, + "step": 7346 + }, + { + "epoch": 1.53, + "learning_rate": 7.922324416285969e-07, + "loss": 0.9776, + "step": 7347 + }, + { + "epoch": 1.53, + "learning_rate": 7.915667428000514e-07, + "loss": 0.8516, + "step": 7348 + }, + { + "epoch": 1.53, + "learning_rate": 7.909012812675016e-07, + "loss": 0.8838, + "step": 7349 + }, + { + "epoch": 1.53, + "learning_rate": 7.902360571024514e-07, + "loss": 0.7456, + "step": 7350 + }, + { + "epoch": 1.53, + "learning_rate": 7.8957107037638e-07, + "loss": 0.9663, + "step": 7351 + }, + { + "epoch": 1.53, + "learning_rate": 7.889063211607389e-07, + "loss": 0.8825, + "step": 7352 + }, + { + "epoch": 1.53, + "learning_rate": 7.882418095269554e-07, + "loss": 0.8225, + "step": 7353 + }, + { + "epoch": 1.53, + "learning_rate": 7.875775355464319e-07, + "loss": 0.8814, + "step": 7354 + }, + { + "epoch": 1.53, + "learning_rate": 7.86913499290545e-07, + "loss": 0.8702, + "step": 7355 + }, + { + "epoch": 1.53, + "learning_rate": 7.86249700830643e-07, + "loss": 1.0794, + "step": 7356 + }, + { + "epoch": 1.53, + "learning_rate": 7.855861402380543e-07, + "loss": 0.8924, + "step": 7357 + }, + { + "epoch": 1.53, + "learning_rate": 7.84922817584076e-07, + "loss": 0.9579, + "step": 7358 + }, + { + "epoch": 1.53, + "learning_rate": 7.842597329399839e-07, + "loss": 0.9061, + "step": 7359 + }, + { + "epoch": 1.53, + "learning_rate": 7.835968863770235e-07, + "loss": 0.8381, + "step": 7360 + }, + { + "epoch": 1.53, + "learning_rate": 7.829342779664214e-07, + "loss": 0.8636, + "step": 7361 + }, + { + "epoch": 1.53, + "learning_rate": 7.822719077793721e-07, + "loss": 0.6912, + "step": 7362 + }, + { + "epoch": 1.53, + "learning_rate": 7.816097758870487e-07, + "loss": 0.8808, + "step": 7363 + }, + { + "epoch": 1.53, + "learning_rate": 7.809478823605951e-07, + "loss": 0.9122, + "step": 7364 + }, + { + "epoch": 1.53, + "learning_rate": 7.802862272711346e-07, + "loss": 1.0367, + "step": 7365 + }, + { + "epoch": 1.53, + "learning_rate": 7.796248106897598e-07, + "loss": 0.9266, + "step": 7366 + }, + { + "epoch": 1.53, + "learning_rate": 7.789636326875417e-07, + "loss": 0.9236, + "step": 7367 + }, + { + "epoch": 1.53, + "learning_rate": 7.783026933355208e-07, + "loss": 0.9301, + "step": 7368 + }, + { + "epoch": 1.53, + "learning_rate": 7.776419927047192e-07, + "loss": 0.8534, + "step": 7369 + }, + { + "epoch": 1.53, + "learning_rate": 7.769815308661254e-07, + "loss": 0.719, + "step": 7370 + }, + { + "epoch": 1.53, + "learning_rate": 7.763213078907087e-07, + "loss": 0.9282, + "step": 7371 + }, + { + "epoch": 1.53, + "learning_rate": 7.75661323849407e-07, + "loss": 0.999, + "step": 7372 + }, + { + "epoch": 1.53, + "learning_rate": 7.750015788131391e-07, + "loss": 0.9026, + "step": 7373 + }, + { + "epoch": 1.53, + "learning_rate": 7.743420728527917e-07, + "loss": 0.807, + "step": 7374 + }, + { + "epoch": 1.53, + "learning_rate": 7.73682806039231e-07, + "loss": 0.977, + "step": 7375 + }, + { + "epoch": 1.53, + "learning_rate": 7.730237784432919e-07, + "loss": 0.9897, + "step": 7376 + }, + { + "epoch": 1.53, + "learning_rate": 7.723649901357906e-07, + "loss": 0.9077, + "step": 7377 + }, + { + "epoch": 1.53, + "learning_rate": 7.717064411875113e-07, + "loss": 0.9175, + "step": 7378 + }, + { + "epoch": 1.53, + "learning_rate": 7.710481316692169e-07, + "loss": 0.873, + "step": 7379 + }, + { + "epoch": 1.53, + "learning_rate": 7.703900616516398e-07, + "loss": 1.0283, + "step": 7380 + }, + { + "epoch": 1.54, + "learning_rate": 7.697322312054934e-07, + "loss": 0.7552, + "step": 7381 + }, + { + "epoch": 1.54, + "learning_rate": 7.690746404014585e-07, + "loss": 0.9323, + "step": 7382 + }, + { + "epoch": 1.54, + "learning_rate": 7.684172893101955e-07, + "loss": 0.8412, + "step": 7383 + }, + { + "epoch": 1.54, + "learning_rate": 7.677601780023336e-07, + "loss": 0.8896, + "step": 7384 + }, + { + "epoch": 1.54, + "learning_rate": 7.671033065484828e-07, + "loss": 1.1749, + "step": 7385 + }, + { + "epoch": 1.54, + "learning_rate": 7.664466750192218e-07, + "loss": 0.9042, + "step": 7386 + }, + { + "epoch": 1.54, + "learning_rate": 7.657902834851069e-07, + "loss": 1.0901, + "step": 7387 + }, + { + "epoch": 1.54, + "learning_rate": 7.651341320166651e-07, + "loss": 1.088, + "step": 7388 + }, + { + "epoch": 1.54, + "learning_rate": 7.644782206844028e-07, + "loss": 1.1047, + "step": 7389 + }, + { + "epoch": 1.54, + "learning_rate": 7.638225495587954e-07, + "loss": 0.8567, + "step": 7390 + }, + { + "epoch": 1.54, + "learning_rate": 7.631671187102965e-07, + "loss": 1.1336, + "step": 7391 + }, + { + "epoch": 1.54, + "learning_rate": 7.625119282093292e-07, + "loss": 0.8438, + "step": 7392 + }, + { + "epoch": 1.54, + "learning_rate": 7.618569781262971e-07, + "loss": 0.8927, + "step": 7393 + }, + { + "epoch": 1.54, + "learning_rate": 7.612022685315723e-07, + "loss": 0.7267, + "step": 7394 + }, + { + "epoch": 1.54, + "learning_rate": 7.605477994955049e-07, + "loss": 0.8531, + "step": 7395 + }, + { + "epoch": 1.54, + "learning_rate": 7.598935710884149e-07, + "loss": 0.9113, + "step": 7396 + }, + { + "epoch": 1.54, + "learning_rate": 7.592395833806026e-07, + "loss": 0.8517, + "step": 7397 + }, + { + "epoch": 1.54, + "learning_rate": 7.585858364423364e-07, + "loss": 0.7279, + "step": 7398 + }, + { + "epoch": 1.54, + "learning_rate": 7.579323303438629e-07, + "loss": 0.8726, + "step": 7399 + }, + { + "epoch": 1.54, + "learning_rate": 7.572790651553987e-07, + "loss": 0.9937, + "step": 7400 + }, + { + "epoch": 1.54, + "learning_rate": 7.566260409471406e-07, + "loss": 0.8522, + "step": 7401 + }, + { + "epoch": 1.54, + "learning_rate": 7.559732577892536e-07, + "loss": 0.6691, + "step": 7402 + }, + { + "epoch": 1.54, + "learning_rate": 7.553207157518806e-07, + "loss": 1.1136, + "step": 7403 + }, + { + "epoch": 1.54, + "learning_rate": 7.546684149051349e-07, + "loss": 0.7998, + "step": 7404 + }, + { + "epoch": 1.54, + "learning_rate": 7.540163553191096e-07, + "loss": 0.9884, + "step": 7405 + }, + { + "epoch": 1.54, + "learning_rate": 7.533645370638656e-07, + "loss": 0.9285, + "step": 7406 + }, + { + "epoch": 1.54, + "learning_rate": 7.527129602094427e-07, + "loss": 0.7079, + "step": 7407 + }, + { + "epoch": 1.54, + "learning_rate": 7.5206162482585e-07, + "loss": 1.0801, + "step": 7408 + }, + { + "epoch": 1.54, + "learning_rate": 7.514105309830774e-07, + "loss": 0.8318, + "step": 7409 + }, + { + "epoch": 1.54, + "learning_rate": 7.507596787510818e-07, + "loss": 0.8021, + "step": 7410 + }, + { + "epoch": 1.54, + "learning_rate": 7.501090681997991e-07, + "loss": 1.1928, + "step": 7411 + }, + { + "epoch": 1.54, + "learning_rate": 7.494586993991345e-07, + "loss": 1.0073, + "step": 7412 + }, + { + "epoch": 1.54, + "learning_rate": 7.488085724189742e-07, + "loss": 0.6994, + "step": 7413 + }, + { + "epoch": 1.54, + "learning_rate": 7.481586873291709e-07, + "loss": 0.9753, + "step": 7414 + }, + { + "epoch": 1.54, + "learning_rate": 7.475090441995569e-07, + "loss": 0.9284, + "step": 7415 + }, + { + "epoch": 1.54, + "learning_rate": 7.468596430999336e-07, + "loss": 1.136, + "step": 7416 + }, + { + "epoch": 1.54, + "learning_rate": 7.462104841000828e-07, + "loss": 0.8784, + "step": 7417 + }, + { + "epoch": 1.54, + "learning_rate": 7.455615672697536e-07, + "loss": 0.8276, + "step": 7418 + }, + { + "epoch": 1.54, + "learning_rate": 7.449128926786743e-07, + "loss": 1.0349, + "step": 7419 + }, + { + "epoch": 1.54, + "learning_rate": 7.442644603965415e-07, + "loss": 0.8779, + "step": 7420 + }, + { + "epoch": 1.54, + "learning_rate": 7.436162704930336e-07, + "loss": 0.8655, + "step": 7421 + }, + { + "epoch": 1.54, + "learning_rate": 7.429683230377955e-07, + "loss": 0.9799, + "step": 7422 + }, + { + "epoch": 1.54, + "learning_rate": 7.423206181004499e-07, + "loss": 0.7304, + "step": 7423 + }, + { + "epoch": 1.54, + "learning_rate": 7.416731557505929e-07, + "loss": 0.8815, + "step": 7424 + }, + { + "epoch": 1.54, + "learning_rate": 7.410259360577951e-07, + "loss": 1.1001, + "step": 7425 + }, + { + "epoch": 1.54, + "learning_rate": 7.403789590915981e-07, + "loss": 0.928, + "step": 7426 + }, + { + "epoch": 1.54, + "learning_rate": 7.39732224921521e-07, + "loss": 0.9189, + "step": 7427 + }, + { + "epoch": 1.54, + "learning_rate": 7.390857336170549e-07, + "loss": 0.846, + "step": 7428 + }, + { + "epoch": 1.55, + "learning_rate": 7.384394852476665e-07, + "loss": 0.6707, + "step": 7429 + }, + { + "epoch": 1.55, + "learning_rate": 7.377934798827929e-07, + "loss": 0.9191, + "step": 7430 + }, + { + "epoch": 1.55, + "learning_rate": 7.371477175918486e-07, + "loss": 0.8146, + "step": 7431 + }, + { + "epoch": 1.55, + "learning_rate": 7.365021984442205e-07, + "loss": 0.9885, + "step": 7432 + }, + { + "epoch": 1.55, + "learning_rate": 7.358569225092704e-07, + "loss": 0.9329, + "step": 7433 + }, + { + "epoch": 1.55, + "learning_rate": 7.352118898563318e-07, + "loss": 0.9548, + "step": 7434 + }, + { + "epoch": 1.55, + "learning_rate": 7.345671005547139e-07, + "loss": 1.0497, + "step": 7435 + }, + { + "epoch": 1.55, + "learning_rate": 7.339225546736991e-07, + "loss": 0.9436, + "step": 7436 + }, + { + "epoch": 1.55, + "learning_rate": 7.332782522825453e-07, + "loss": 0.802, + "step": 7437 + }, + { + "epoch": 1.55, + "learning_rate": 7.326341934504806e-07, + "loss": 0.9326, + "step": 7438 + }, + { + "epoch": 1.55, + "learning_rate": 7.319903782467102e-07, + "loss": 0.7717, + "step": 7439 + }, + { + "epoch": 1.55, + "learning_rate": 7.313468067404117e-07, + "loss": 0.7179, + "step": 7440 + }, + { + "epoch": 1.55, + "learning_rate": 7.307034790007376e-07, + "loss": 0.7544, + "step": 7441 + }, + { + "epoch": 1.55, + "learning_rate": 7.300603950968122e-07, + "loss": 0.9263, + "step": 7442 + }, + { + "epoch": 1.55, + "learning_rate": 7.294175550977351e-07, + "loss": 0.8963, + "step": 7443 + }, + { + "epoch": 1.55, + "learning_rate": 7.287749590725797e-07, + "loss": 1.1683, + "step": 7444 + }, + { + "epoch": 1.55, + "learning_rate": 7.281326070903939e-07, + "loss": 0.9533, + "step": 7445 + }, + { + "epoch": 1.55, + "learning_rate": 7.274904992201967e-07, + "loss": 0.8576, + "step": 7446 + }, + { + "epoch": 1.55, + "learning_rate": 7.268486355309828e-07, + "loss": 0.938, + "step": 7447 + }, + { + "epoch": 1.55, + "learning_rate": 7.262070160917209e-07, + "loss": 0.8195, + "step": 7448 + }, + { + "epoch": 1.55, + "learning_rate": 7.255656409713534e-07, + "loss": 0.7969, + "step": 7449 + }, + { + "epoch": 1.55, + "learning_rate": 7.24924510238795e-07, + "loss": 0.7858, + "step": 7450 + }, + { + "epoch": 1.55, + "learning_rate": 7.242836239629358e-07, + "loss": 0.9187, + "step": 7451 + }, + { + "epoch": 1.55, + "learning_rate": 7.236429822126387e-07, + "loss": 0.7086, + "step": 7452 + }, + { + "epoch": 1.55, + "learning_rate": 7.230025850567413e-07, + "loss": 0.8162, + "step": 7453 + }, + { + "epoch": 1.55, + "learning_rate": 7.22362432564053e-07, + "loss": 0.8682, + "step": 7454 + }, + { + "epoch": 1.55, + "learning_rate": 7.217225248033586e-07, + "loss": 0.9228, + "step": 7455 + }, + { + "epoch": 1.55, + "learning_rate": 7.210828618434167e-07, + "loss": 0.888, + "step": 7456 + }, + { + "epoch": 1.55, + "learning_rate": 7.204434437529593e-07, + "loss": 0.6331, + "step": 7457 + }, + { + "epoch": 1.55, + "learning_rate": 7.198042706006905e-07, + "loss": 0.9279, + "step": 7458 + }, + { + "epoch": 1.55, + "learning_rate": 7.191653424552899e-07, + "loss": 0.7514, + "step": 7459 + }, + { + "epoch": 1.55, + "learning_rate": 7.185266593854104e-07, + "loss": 1.1746, + "step": 7460 + }, + { + "epoch": 1.55, + "learning_rate": 7.178882214596798e-07, + "loss": 0.8097, + "step": 7461 + }, + { + "epoch": 1.55, + "learning_rate": 7.17250028746696e-07, + "loss": 0.8571, + "step": 7462 + }, + { + "epoch": 1.55, + "learning_rate": 7.166120813150334e-07, + "loss": 0.8615, + "step": 7463 + }, + { + "epoch": 1.55, + "learning_rate": 7.1597437923324e-07, + "loss": 0.8783, + "step": 7464 + }, + { + "epoch": 1.55, + "learning_rate": 7.153369225698374e-07, + "loss": 0.9609, + "step": 7465 + }, + { + "epoch": 1.55, + "learning_rate": 7.146997113933184e-07, + "loss": 0.9675, + "step": 7466 + }, + { + "epoch": 1.55, + "learning_rate": 7.140627457721527e-07, + "loss": 0.8527, + "step": 7467 + }, + { + "epoch": 1.55, + "learning_rate": 7.134260257747813e-07, + "loss": 0.9077, + "step": 7468 + }, + { + "epoch": 1.55, + "learning_rate": 7.127895514696215e-07, + "loss": 1.0858, + "step": 7469 + }, + { + "epoch": 1.55, + "learning_rate": 7.121533229250603e-07, + "loss": 0.9302, + "step": 7470 + }, + { + "epoch": 1.55, + "learning_rate": 7.115173402094613e-07, + "loss": 1.0243, + "step": 7471 + }, + { + "epoch": 1.55, + "learning_rate": 7.108816033911607e-07, + "loss": 0.8831, + "step": 7472 + }, + { + "epoch": 1.55, + "learning_rate": 7.102461125384694e-07, + "loss": 0.9573, + "step": 7473 + }, + { + "epoch": 1.55, + "learning_rate": 7.09610867719669e-07, + "loss": 0.9398, + "step": 7474 + }, + { + "epoch": 1.55, + "learning_rate": 7.089758690030176e-07, + "loss": 0.9519, + "step": 7475 + }, + { + "epoch": 1.55, + "learning_rate": 7.083411164567456e-07, + "loss": 0.9624, + "step": 7476 + }, + { + "epoch": 1.56, + "learning_rate": 7.077066101490577e-07, + "loss": 0.8321, + "step": 7477 + }, + { + "epoch": 1.56, + "learning_rate": 7.070723501481303e-07, + "loss": 0.8352, + "step": 7478 + }, + { + "epoch": 1.56, + "learning_rate": 7.06438336522115e-07, + "loss": 0.8758, + "step": 7479 + }, + { + "epoch": 1.56, + "learning_rate": 7.05804569339137e-07, + "loss": 1.0054, + "step": 7480 + }, + { + "epoch": 1.56, + "learning_rate": 7.05171048667295e-07, + "loss": 0.8958, + "step": 7481 + }, + { + "epoch": 1.56, + "learning_rate": 7.045377745746597e-07, + "loss": 0.8075, + "step": 7482 + }, + { + "epoch": 1.56, + "learning_rate": 7.039047471292761e-07, + "loss": 0.8432, + "step": 7483 + }, + { + "epoch": 1.56, + "learning_rate": 7.032719663991638e-07, + "loss": 1.028, + "step": 7484 + }, + { + "epoch": 1.56, + "learning_rate": 7.02639432452316e-07, + "loss": 0.8788, + "step": 7485 + }, + { + "epoch": 1.56, + "learning_rate": 7.020071453566959e-07, + "loss": 0.8987, + "step": 7486 + }, + { + "epoch": 1.56, + "learning_rate": 7.013751051802444e-07, + "loss": 0.9782, + "step": 7487 + }, + { + "epoch": 1.56, + "learning_rate": 7.007433119908737e-07, + "loss": 0.9662, + "step": 7488 + }, + { + "epoch": 1.56, + "learning_rate": 7.001117658564707e-07, + "loss": 0.824, + "step": 7489 + }, + { + "epoch": 1.56, + "learning_rate": 6.994804668448939e-07, + "loss": 1.1885, + "step": 7490 + }, + { + "epoch": 1.56, + "learning_rate": 6.988494150239764e-07, + "loss": 0.8744, + "step": 7491 + }, + { + "epoch": 1.56, + "learning_rate": 6.982186104615253e-07, + "loss": 0.8674, + "step": 7492 + }, + { + "epoch": 1.56, + "learning_rate": 6.975880532253209e-07, + "loss": 0.9561, + "step": 7493 + }, + { + "epoch": 1.56, + "learning_rate": 6.969577433831152e-07, + "loss": 0.7053, + "step": 7494 + }, + { + "epoch": 1.56, + "learning_rate": 6.963276810026357e-07, + "loss": 0.7363, + "step": 7495 + }, + { + "epoch": 1.56, + "learning_rate": 6.95697866151582e-07, + "loss": 0.9197, + "step": 7496 + }, + { + "epoch": 1.56, + "learning_rate": 6.950682988976296e-07, + "loss": 0.7064, + "step": 7497 + }, + { + "epoch": 1.56, + "learning_rate": 6.944389793084228e-07, + "loss": 0.8171, + "step": 7498 + }, + { + "epoch": 1.56, + "learning_rate": 6.938099074515833e-07, + "loss": 0.8865, + "step": 7499 + }, + { + "epoch": 1.56, + "learning_rate": 6.931810833947047e-07, + "loss": 0.8677, + "step": 7500 + }, + { + "epoch": 1.56, + "learning_rate": 6.92552507205355e-07, + "loss": 0.8853, + "step": 7501 + }, + { + "epoch": 1.56, + "learning_rate": 6.919241789510728e-07, + "loss": 0.7557, + "step": 7502 + }, + { + "epoch": 1.56, + "learning_rate": 6.912960986993726e-07, + "loss": 0.841, + "step": 7503 + }, + { + "epoch": 1.56, + "learning_rate": 6.906682665177423e-07, + "loss": 0.9461, + "step": 7504 + }, + { + "epoch": 1.56, + "learning_rate": 6.900406824736424e-07, + "loss": 0.9437, + "step": 7505 + }, + { + "epoch": 1.56, + "learning_rate": 6.89413346634506e-07, + "loss": 0.8269, + "step": 7506 + }, + { + "epoch": 1.56, + "learning_rate": 6.887862590677401e-07, + "loss": 0.9287, + "step": 7507 + }, + { + "epoch": 1.56, + "learning_rate": 6.881594198407259e-07, + "loss": 1.0388, + "step": 7508 + }, + { + "epoch": 1.56, + "learning_rate": 6.87532829020818e-07, + "loss": 0.8535, + "step": 7509 + }, + { + "epoch": 1.56, + "learning_rate": 6.869064866753419e-07, + "loss": 0.9615, + "step": 7510 + }, + { + "epoch": 1.56, + "learning_rate": 6.862803928715985e-07, + "loss": 0.9567, + "step": 7511 + }, + { + "epoch": 1.56, + "learning_rate": 6.856545476768618e-07, + "loss": 0.8097, + "step": 7512 + }, + { + "epoch": 1.56, + "learning_rate": 6.850289511583798e-07, + "loss": 1.1117, + "step": 7513 + }, + { + "epoch": 1.56, + "learning_rate": 6.844036033833709e-07, + "loss": 0.8382, + "step": 7514 + }, + { + "epoch": 1.56, + "learning_rate": 6.837785044190295e-07, + "loss": 0.9604, + "step": 7515 + }, + { + "epoch": 1.56, + "learning_rate": 6.831536543325227e-07, + "loss": 0.9503, + "step": 7516 + }, + { + "epoch": 1.56, + "learning_rate": 6.825290531909912e-07, + "loss": 0.8373, + "step": 7517 + }, + { + "epoch": 1.56, + "learning_rate": 6.819047010615466e-07, + "loss": 0.8863, + "step": 7518 + }, + { + "epoch": 1.56, + "learning_rate": 6.812805980112766e-07, + "loss": 0.9506, + "step": 7519 + }, + { + "epoch": 1.56, + "learning_rate": 6.806567441072406e-07, + "loss": 0.9264, + "step": 7520 + }, + { + "epoch": 1.56, + "learning_rate": 6.800331394164731e-07, + "loss": 0.9686, + "step": 7521 + }, + { + "epoch": 1.56, + "learning_rate": 6.794097840059782e-07, + "loss": 0.8393, + "step": 7522 + }, + { + "epoch": 1.56, + "learning_rate": 6.787866779427366e-07, + "loss": 0.8671, + "step": 7523 + }, + { + "epoch": 1.56, + "learning_rate": 6.781638212937005e-07, + "loss": 0.9978, + "step": 7524 + }, + { + "epoch": 1.57, + "learning_rate": 6.775412141257972e-07, + "loss": 0.9438, + "step": 7525 + }, + { + "epoch": 1.57, + "learning_rate": 6.769188565059238e-07, + "loss": 0.9153, + "step": 7526 + }, + { + "epoch": 1.57, + "learning_rate": 6.762967485009534e-07, + "loss": 1.0263, + "step": 7527 + }, + { + "epoch": 1.57, + "learning_rate": 6.756748901777318e-07, + "loss": 1.0928, + "step": 7528 + }, + { + "epoch": 1.57, + "learning_rate": 6.750532816030782e-07, + "loss": 0.8947, + "step": 7529 + }, + { + "epoch": 1.57, + "learning_rate": 6.744319228437827e-07, + "loss": 0.8573, + "step": 7530 + }, + { + "epoch": 1.57, + "learning_rate": 6.73810813966611e-07, + "loss": 0.7961, + "step": 7531 + }, + { + "epoch": 1.57, + "learning_rate": 6.731899550383014e-07, + "loss": 0.6579, + "step": 7532 + }, + { + "epoch": 1.57, + "learning_rate": 6.725693461255661e-07, + "loss": 0.7531, + "step": 7533 + }, + { + "epoch": 1.57, + "learning_rate": 6.719489872950877e-07, + "loss": 1.1253, + "step": 7534 + }, + { + "epoch": 1.57, + "learning_rate": 6.713288786135243e-07, + "loss": 0.8856, + "step": 7535 + }, + { + "epoch": 1.57, + "learning_rate": 6.707090201475067e-07, + "loss": 0.8989, + "step": 7536 + }, + { + "epoch": 1.57, + "learning_rate": 6.700894119636395e-07, + "loss": 0.8987, + "step": 7537 + }, + { + "epoch": 1.57, + "learning_rate": 6.694700541284977e-07, + "loss": 0.9426, + "step": 7538 + }, + { + "epoch": 1.57, + "learning_rate": 6.688509467086327e-07, + "loss": 0.9201, + "step": 7539 + }, + { + "epoch": 1.57, + "learning_rate": 6.682320897705668e-07, + "loss": 0.9321, + "step": 7540 + }, + { + "epoch": 1.57, + "learning_rate": 6.676134833807976e-07, + "loss": 0.8725, + "step": 7541 + }, + { + "epoch": 1.57, + "learning_rate": 6.669951276057921e-07, + "loss": 0.797, + "step": 7542 + }, + { + "epoch": 1.57, + "learning_rate": 6.663770225119936e-07, + "loss": 0.925, + "step": 7543 + }, + { + "epoch": 1.57, + "learning_rate": 6.657591681658179e-07, + "loss": 1.0634, + "step": 7544 + }, + { + "epoch": 1.57, + "learning_rate": 6.651415646336536e-07, + "loss": 0.9552, + "step": 7545 + }, + { + "epoch": 1.57, + "learning_rate": 6.645242119818606e-07, + "loss": 0.9419, + "step": 7546 + }, + { + "epoch": 1.57, + "learning_rate": 6.639071102767745e-07, + "loss": 0.8971, + "step": 7547 + }, + { + "epoch": 1.57, + "learning_rate": 6.632902595847026e-07, + "loss": 0.7732, + "step": 7548 + }, + { + "epoch": 1.57, + "learning_rate": 6.626736599719267e-07, + "loss": 0.792, + "step": 7549 + }, + { + "epoch": 1.57, + "learning_rate": 6.62057311504698e-07, + "loss": 0.7365, + "step": 7550 + }, + { + "epoch": 1.57, + "learning_rate": 6.614412142492446e-07, + "loss": 1.0805, + "step": 7551 + }, + { + "epoch": 1.57, + "learning_rate": 6.608253682717667e-07, + "loss": 1.0099, + "step": 7552 + }, + { + "epoch": 1.57, + "learning_rate": 6.602097736384345e-07, + "loss": 0.9444, + "step": 7553 + }, + { + "epoch": 1.57, + "learning_rate": 6.595944304153964e-07, + "loss": 0.9189, + "step": 7554 + }, + { + "epoch": 1.57, + "learning_rate": 6.589793386687691e-07, + "loss": 0.6577, + "step": 7555 + }, + { + "epoch": 1.57, + "learning_rate": 6.583644984646459e-07, + "loss": 0.825, + "step": 7556 + }, + { + "epoch": 1.57, + "learning_rate": 6.577499098690885e-07, + "loss": 0.8252, + "step": 7557 + }, + { + "epoch": 1.57, + "learning_rate": 6.571355729481378e-07, + "loss": 0.7697, + "step": 7558 + }, + { + "epoch": 1.57, + "learning_rate": 6.565214877678014e-07, + "loss": 0.8379, + "step": 7559 + }, + { + "epoch": 1.57, + "learning_rate": 6.55907654394065e-07, + "loss": 0.9417, + "step": 7560 + }, + { + "epoch": 1.57, + "learning_rate": 6.552940728928823e-07, + "loss": 1.0009, + "step": 7561 + }, + { + "epoch": 1.57, + "learning_rate": 6.546807433301858e-07, + "loss": 0.9677, + "step": 7562 + }, + { + "epoch": 1.57, + "learning_rate": 6.540676657718752e-07, + "loss": 0.7718, + "step": 7563 + }, + { + "epoch": 1.57, + "learning_rate": 6.534548402838275e-07, + "loss": 0.7881, + "step": 7564 + }, + { + "epoch": 1.57, + "learning_rate": 6.528422669318876e-07, + "loss": 1.0669, + "step": 7565 + }, + { + "epoch": 1.57, + "learning_rate": 6.522299457818806e-07, + "loss": 0.7585, + "step": 7566 + }, + { + "epoch": 1.57, + "learning_rate": 6.516178768995975e-07, + "loss": 1.0751, + "step": 7567 + }, + { + "epoch": 1.57, + "learning_rate": 6.510060603508071e-07, + "loss": 0.962, + "step": 7568 + }, + { + "epoch": 1.57, + "learning_rate": 6.503944962012459e-07, + "loss": 0.7428, + "step": 7569 + }, + { + "epoch": 1.57, + "learning_rate": 6.497831845166304e-07, + "loss": 1.2438, + "step": 7570 + }, + { + "epoch": 1.57, + "learning_rate": 6.491721253626428e-07, + "loss": 1.0361, + "step": 7571 + }, + { + "epoch": 1.57, + "learning_rate": 6.485613188049436e-07, + "loss": 0.8778, + "step": 7572 + }, + { + "epoch": 1.58, + "learning_rate": 6.479507649091609e-07, + "loss": 1.1807, + "step": 7573 + }, + { + "epoch": 1.58, + "learning_rate": 6.47340463740903e-07, + "loss": 0.8352, + "step": 7574 + }, + { + "epoch": 1.58, + "learning_rate": 6.467304153657431e-07, + "loss": 1.153, + "step": 7575 + }, + { + "epoch": 1.58, + "learning_rate": 6.461206198492329e-07, + "loss": 0.7898, + "step": 7576 + }, + { + "epoch": 1.58, + "learning_rate": 6.455110772568927e-07, + "loss": 0.8553, + "step": 7577 + }, + { + "epoch": 1.58, + "learning_rate": 6.449017876542209e-07, + "loss": 0.8296, + "step": 7578 + }, + { + "epoch": 1.58, + "learning_rate": 6.44292751106683e-07, + "loss": 0.6918, + "step": 7579 + }, + { + "epoch": 1.58, + "learning_rate": 6.436839676797218e-07, + "loss": 0.9482, + "step": 7580 + }, + { + "epoch": 1.58, + "learning_rate": 6.430754374387485e-07, + "loss": 0.943, + "step": 7581 + }, + { + "epoch": 1.58, + "learning_rate": 6.424671604491529e-07, + "loss": 0.8859, + "step": 7582 + }, + { + "epoch": 1.58, + "learning_rate": 6.418591367762919e-07, + "loss": 1.0033, + "step": 7583 + }, + { + "epoch": 1.58, + "learning_rate": 6.41251366485499e-07, + "loss": 0.764, + "step": 7584 + }, + { + "epoch": 1.58, + "learning_rate": 6.406438496420768e-07, + "loss": 0.9505, + "step": 7585 + }, + { + "epoch": 1.58, + "learning_rate": 6.400365863113067e-07, + "loss": 0.7624, + "step": 7586 + }, + { + "epoch": 1.58, + "learning_rate": 6.394295765584356e-07, + "loss": 0.807, + "step": 7587 + }, + { + "epoch": 1.58, + "learning_rate": 6.388228204486894e-07, + "loss": 0.9547, + "step": 7588 + }, + { + "epoch": 1.58, + "learning_rate": 6.382163180472604e-07, + "loss": 0.9457, + "step": 7589 + }, + { + "epoch": 1.58, + "learning_rate": 6.376100694193218e-07, + "loss": 0.9385, + "step": 7590 + }, + { + "epoch": 1.58, + "learning_rate": 6.370040746300112e-07, + "loss": 1.0483, + "step": 7591 + }, + { + "epoch": 1.58, + "learning_rate": 6.363983337444453e-07, + "loss": 0.956, + "step": 7592 + }, + { + "epoch": 1.58, + "learning_rate": 6.357928468277084e-07, + "loss": 1.0805, + "step": 7593 + }, + { + "epoch": 1.58, + "learning_rate": 6.351876139448627e-07, + "loss": 0.9277, + "step": 7594 + }, + { + "epoch": 1.58, + "learning_rate": 6.345826351609384e-07, + "loss": 0.8721, + "step": 7595 + }, + { + "epoch": 1.58, + "learning_rate": 6.339779105409419e-07, + "loss": 0.9782, + "step": 7596 + }, + { + "epoch": 1.58, + "learning_rate": 6.333734401498486e-07, + "loss": 0.9442, + "step": 7597 + }, + { + "epoch": 1.58, + "learning_rate": 6.327692240526117e-07, + "loss": 0.9312, + "step": 7598 + }, + { + "epoch": 1.58, + "learning_rate": 6.321652623141522e-07, + "loss": 0.7836, + "step": 7599 + }, + { + "epoch": 1.58, + "learning_rate": 6.315615549993671e-07, + "loss": 0.8885, + "step": 7600 + }, + { + "epoch": 1.58, + "learning_rate": 6.309581021731219e-07, + "loss": 1.1486, + "step": 7601 + }, + { + "epoch": 1.58, + "learning_rate": 6.303549039002614e-07, + "loss": 0.8918, + "step": 7602 + }, + { + "epoch": 1.58, + "learning_rate": 6.297519602455968e-07, + "loss": 0.7073, + "step": 7603 + }, + { + "epoch": 1.58, + "learning_rate": 6.291492712739156e-07, + "loss": 0.779, + "step": 7604 + }, + { + "epoch": 1.58, + "learning_rate": 6.285468370499742e-07, + "loss": 0.8643, + "step": 7605 + }, + { + "epoch": 1.58, + "learning_rate": 6.27944657638508e-07, + "loss": 0.9395, + "step": 7606 + }, + { + "epoch": 1.58, + "learning_rate": 6.273427331042178e-07, + "loss": 0.889, + "step": 7607 + }, + { + "epoch": 1.58, + "learning_rate": 6.267410635117826e-07, + "loss": 0.9408, + "step": 7608 + }, + { + "epoch": 1.58, + "learning_rate": 6.261396489258492e-07, + "loss": 1.0832, + "step": 7609 + }, + { + "epoch": 1.58, + "learning_rate": 6.255384894110427e-07, + "loss": 0.9211, + "step": 7610 + }, + { + "epoch": 1.58, + "learning_rate": 6.249375850319552e-07, + "loss": 0.8805, + "step": 7611 + }, + { + "epoch": 1.58, + "learning_rate": 6.243369358531556e-07, + "loss": 0.9317, + "step": 7612 + }, + { + "epoch": 1.58, + "learning_rate": 6.237365419391812e-07, + "loss": 1.0597, + "step": 7613 + }, + { + "epoch": 1.58, + "learning_rate": 6.231364033545475e-07, + "loss": 0.9285, + "step": 7614 + }, + { + "epoch": 1.58, + "learning_rate": 6.225365201637365e-07, + "loss": 1.0229, + "step": 7615 + }, + { + "epoch": 1.58, + "learning_rate": 6.219368924312078e-07, + "loss": 0.9293, + "step": 7616 + }, + { + "epoch": 1.58, + "learning_rate": 6.213375202213887e-07, + "loss": 0.8987, + "step": 7617 + }, + { + "epoch": 1.58, + "learning_rate": 6.207384035986847e-07, + "loss": 1.0629, + "step": 7618 + }, + { + "epoch": 1.58, + "learning_rate": 6.201395426274687e-07, + "loss": 0.9628, + "step": 7619 + }, + { + "epoch": 1.58, + "learning_rate": 6.195409373720893e-07, + "loss": 0.8356, + "step": 7620 + }, + { + "epoch": 1.59, + "learning_rate": 6.189425878968662e-07, + "loss": 0.7758, + "step": 7621 + }, + { + "epoch": 1.59, + "learning_rate": 6.183444942660928e-07, + "loss": 0.819, + "step": 7622 + }, + { + "epoch": 1.59, + "learning_rate": 6.177466565440328e-07, + "loss": 1.0032, + "step": 7623 + }, + { + "epoch": 1.59, + "learning_rate": 6.171490747949245e-07, + "loss": 0.8427, + "step": 7624 + }, + { + "epoch": 1.59, + "learning_rate": 6.165517490829781e-07, + "loss": 0.7415, + "step": 7625 + }, + { + "epoch": 1.59, + "learning_rate": 6.159546794723771e-07, + "loss": 0.7944, + "step": 7626 + }, + { + "epoch": 1.59, + "learning_rate": 6.153578660272747e-07, + "loss": 1.0042, + "step": 7627 + }, + { + "epoch": 1.59, + "learning_rate": 6.147613088117996e-07, + "loss": 1.0276, + "step": 7628 + }, + { + "epoch": 1.59, + "learning_rate": 6.141650078900514e-07, + "loss": 0.8412, + "step": 7629 + }, + { + "epoch": 1.59, + "learning_rate": 6.135689633261038e-07, + "loss": 0.8229, + "step": 7630 + }, + { + "epoch": 1.59, + "learning_rate": 6.129731751840003e-07, + "loss": 0.9255, + "step": 7631 + }, + { + "epoch": 1.59, + "learning_rate": 6.123776435277583e-07, + "loss": 0.8498, + "step": 7632 + }, + { + "epoch": 1.59, + "learning_rate": 6.117823684213685e-07, + "loss": 0.8842, + "step": 7633 + }, + { + "epoch": 1.59, + "learning_rate": 6.111873499287935e-07, + "loss": 0.8557, + "step": 7634 + }, + { + "epoch": 1.59, + "learning_rate": 6.105925881139664e-07, + "loss": 0.8867, + "step": 7635 + }, + { + "epoch": 1.59, + "learning_rate": 6.09998083040795e-07, + "loss": 0.9138, + "step": 7636 + }, + { + "epoch": 1.59, + "learning_rate": 6.094038347731596e-07, + "loss": 0.8848, + "step": 7637 + }, + { + "epoch": 1.59, + "learning_rate": 6.088098433749117e-07, + "loss": 0.727, + "step": 7638 + }, + { + "epoch": 1.59, + "learning_rate": 6.082161089098753e-07, + "loss": 0.7655, + "step": 7639 + }, + { + "epoch": 1.59, + "learning_rate": 6.07622631441847e-07, + "loss": 0.8224, + "step": 7640 + }, + { + "epoch": 1.59, + "learning_rate": 6.070294110345961e-07, + "loss": 0.8282, + "step": 7641 + }, + { + "epoch": 1.59, + "learning_rate": 6.064364477518655e-07, + "loss": 0.9875, + "step": 7642 + }, + { + "epoch": 1.59, + "learning_rate": 6.058437416573664e-07, + "loss": 0.7536, + "step": 7643 + }, + { + "epoch": 1.59, + "learning_rate": 6.052512928147869e-07, + "loss": 0.8827, + "step": 7644 + }, + { + "epoch": 1.59, + "learning_rate": 6.046591012877848e-07, + "loss": 1.0034, + "step": 7645 + }, + { + "epoch": 1.59, + "learning_rate": 6.040671671399923e-07, + "loss": 0.8943, + "step": 7646 + }, + { + "epoch": 1.59, + "learning_rate": 6.03475490435011e-07, + "loss": 1.0488, + "step": 7647 + }, + { + "epoch": 1.59, + "learning_rate": 6.028840712364171e-07, + "loss": 0.786, + "step": 7648 + }, + { + "epoch": 1.59, + "learning_rate": 6.022929096077586e-07, + "loss": 0.6305, + "step": 7649 + }, + { + "epoch": 1.59, + "learning_rate": 6.017020056125567e-07, + "loss": 0.9802, + "step": 7650 + }, + { + "epoch": 1.59, + "learning_rate": 6.011113593143027e-07, + "loss": 0.8576, + "step": 7651 + }, + { + "epoch": 1.59, + "learning_rate": 6.005209707764615e-07, + "loss": 0.8194, + "step": 7652 + }, + { + "epoch": 1.59, + "learning_rate": 5.99930840062471e-07, + "loss": 1.0372, + "step": 7653 + }, + { + "epoch": 1.59, + "learning_rate": 5.993409672357414e-07, + "loss": 1.059, + "step": 7654 + }, + { + "epoch": 1.59, + "learning_rate": 5.987513523596526e-07, + "loss": 0.7694, + "step": 7655 + }, + { + "epoch": 1.59, + "learning_rate": 5.981619954975597e-07, + "loss": 0.869, + "step": 7656 + }, + { + "epoch": 1.59, + "learning_rate": 5.97572896712789e-07, + "loss": 0.7695, + "step": 7657 + }, + { + "epoch": 1.59, + "learning_rate": 5.969840560686397e-07, + "loss": 0.8373, + "step": 7658 + }, + { + "epoch": 1.59, + "learning_rate": 5.963954736283815e-07, + "loss": 0.8951, + "step": 7659 + }, + { + "epoch": 1.59, + "learning_rate": 5.958071494552578e-07, + "loss": 1.2095, + "step": 7660 + }, + { + "epoch": 1.59, + "learning_rate": 5.952190836124847e-07, + "loss": 0.837, + "step": 7661 + }, + { + "epoch": 1.59, + "learning_rate": 5.946312761632501e-07, + "loss": 0.865, + "step": 7662 + }, + { + "epoch": 1.59, + "learning_rate": 5.940437271707126e-07, + "loss": 0.6568, + "step": 7663 + }, + { + "epoch": 1.59, + "learning_rate": 5.934564366980049e-07, + "loss": 1.0285, + "step": 7664 + }, + { + "epoch": 1.59, + "learning_rate": 5.928694048082313e-07, + "loss": 1.1463, + "step": 7665 + }, + { + "epoch": 1.59, + "learning_rate": 5.922826315644692e-07, + "loss": 0.7961, + "step": 7666 + }, + { + "epoch": 1.59, + "learning_rate": 5.916961170297658e-07, + "loss": 0.7931, + "step": 7667 + }, + { + "epoch": 1.59, + "learning_rate": 5.911098612671427e-07, + "loss": 0.7146, + "step": 7668 + }, + { + "epoch": 1.6, + "learning_rate": 5.90523864339593e-07, + "loss": 0.842, + "step": 7669 + }, + { + "epoch": 1.6, + "learning_rate": 5.899381263100831e-07, + "loss": 0.9911, + "step": 7670 + }, + { + "epoch": 1.6, + "learning_rate": 5.893526472415487e-07, + "loss": 0.9649, + "step": 7671 + }, + { + "epoch": 1.6, + "learning_rate": 5.887674271969003e-07, + "loss": 0.96, + "step": 7672 + }, + { + "epoch": 1.6, + "learning_rate": 5.881824662390197e-07, + "loss": 1.0115, + "step": 7673 + }, + { + "epoch": 1.6, + "learning_rate": 5.875977644307618e-07, + "loss": 1.0381, + "step": 7674 + }, + { + "epoch": 1.6, + "learning_rate": 5.870133218349515e-07, + "loss": 0.7622, + "step": 7675 + }, + { + "epoch": 1.6, + "learning_rate": 5.864291385143871e-07, + "loss": 0.8103, + "step": 7676 + }, + { + "epoch": 1.6, + "learning_rate": 5.8584521453184e-07, + "loss": 0.8705, + "step": 7677 + }, + { + "epoch": 1.6, + "learning_rate": 5.852615499500529e-07, + "loss": 1.0694, + "step": 7678 + }, + { + "epoch": 1.6, + "learning_rate": 5.846781448317391e-07, + "loss": 1.0454, + "step": 7679 + }, + { + "epoch": 1.6, + "learning_rate": 5.840949992395862e-07, + "loss": 0.8624, + "step": 7680 + }, + { + "epoch": 1.6, + "learning_rate": 5.835121132362534e-07, + "loss": 1.0411, + "step": 7681 + }, + { + "epoch": 1.6, + "learning_rate": 5.829294868843727e-07, + "loss": 1.1551, + "step": 7682 + }, + { + "epoch": 1.6, + "learning_rate": 5.823471202465448e-07, + "loss": 0.9614, + "step": 7683 + }, + { + "epoch": 1.6, + "learning_rate": 5.817650133853465e-07, + "loss": 0.9048, + "step": 7684 + }, + { + "epoch": 1.6, + "learning_rate": 5.81183166363325e-07, + "loss": 0.8626, + "step": 7685 + }, + { + "epoch": 1.6, + "learning_rate": 5.806015792430007e-07, + "loss": 1.0661, + "step": 7686 + }, + { + "epoch": 1.6, + "learning_rate": 5.800202520868623e-07, + "loss": 0.9711, + "step": 7687 + }, + { + "epoch": 1.6, + "learning_rate": 5.794391849573763e-07, + "loss": 0.7792, + "step": 7688 + }, + { + "epoch": 1.6, + "learning_rate": 5.788583779169771e-07, + "loss": 1.0243, + "step": 7689 + }, + { + "epoch": 1.6, + "learning_rate": 5.782778310280728e-07, + "loss": 0.8962, + "step": 7690 + }, + { + "epoch": 1.6, + "learning_rate": 5.776975443530409e-07, + "loss": 1.2037, + "step": 7691 + }, + { + "epoch": 1.6, + "learning_rate": 5.77117517954237e-07, + "loss": 0.8311, + "step": 7692 + }, + { + "epoch": 1.6, + "learning_rate": 5.76537751893982e-07, + "loss": 0.956, + "step": 7693 + }, + { + "epoch": 1.6, + "learning_rate": 5.759582462345734e-07, + "loss": 0.8604, + "step": 7694 + }, + { + "epoch": 1.6, + "learning_rate": 5.753790010382768e-07, + "loss": 1.0829, + "step": 7695 + }, + { + "epoch": 1.6, + "learning_rate": 5.748000163673348e-07, + "loss": 1.0475, + "step": 7696 + }, + { + "epoch": 1.6, + "learning_rate": 5.742212922839574e-07, + "loss": 0.9868, + "step": 7697 + }, + { + "epoch": 1.6, + "learning_rate": 5.736428288503301e-07, + "loss": 0.7862, + "step": 7698 + }, + { + "epoch": 1.6, + "learning_rate": 5.730646261286057e-07, + "loss": 0.7392, + "step": 7699 + }, + { + "epoch": 1.6, + "learning_rate": 5.724866841809155e-07, + "loss": 0.9327, + "step": 7700 + }, + { + "epoch": 1.6, + "learning_rate": 5.719090030693576e-07, + "loss": 0.8517, + "step": 7701 + }, + { + "epoch": 1.6, + "learning_rate": 5.713315828560046e-07, + "loss": 0.991, + "step": 7702 + }, + { + "epoch": 1.6, + "learning_rate": 5.707544236028981e-07, + "loss": 0.9132, + "step": 7703 + }, + { + "epoch": 1.6, + "learning_rate": 5.701775253720572e-07, + "loss": 1.1499, + "step": 7704 + }, + { + "epoch": 1.6, + "learning_rate": 5.696008882254668e-07, + "loss": 0.8372, + "step": 7705 + }, + { + "epoch": 1.6, + "learning_rate": 5.690245122250884e-07, + "loss": 0.8291, + "step": 7706 + }, + { + "epoch": 1.6, + "learning_rate": 5.68448397432851e-07, + "loss": 0.824, + "step": 7707 + }, + { + "epoch": 1.6, + "learning_rate": 5.678725439106613e-07, + "loss": 0.811, + "step": 7708 + }, + { + "epoch": 1.6, + "learning_rate": 5.672969517203927e-07, + "loss": 0.8469, + "step": 7709 + }, + { + "epoch": 1.6, + "learning_rate": 5.667216209238934e-07, + "loss": 0.7224, + "step": 7710 + }, + { + "epoch": 1.6, + "learning_rate": 5.661465515829814e-07, + "loss": 0.8492, + "step": 7711 + }, + { + "epoch": 1.6, + "learning_rate": 5.655717437594497e-07, + "loss": 1.2933, + "step": 7712 + }, + { + "epoch": 1.6, + "learning_rate": 5.6499719751506e-07, + "loss": 1.0846, + "step": 7713 + }, + { + "epoch": 1.6, + "learning_rate": 5.644229129115482e-07, + "loss": 0.7748, + "step": 7714 + }, + { + "epoch": 1.6, + "learning_rate": 5.638488900106192e-07, + "loss": 0.8871, + "step": 7715 + }, + { + "epoch": 1.6, + "learning_rate": 5.632751288739546e-07, + "loss": 0.7457, + "step": 7716 + }, + { + "epoch": 1.61, + "learning_rate": 5.62701629563203e-07, + "loss": 0.7554, + "step": 7717 + }, + { + "epoch": 1.61, + "learning_rate": 5.621283921399878e-07, + "loss": 0.7913, + "step": 7718 + }, + { + "epoch": 1.61, + "learning_rate": 5.615554166659016e-07, + "loss": 0.6993, + "step": 7719 + }, + { + "epoch": 1.61, + "learning_rate": 5.609827032025136e-07, + "loss": 0.8644, + "step": 7720 + }, + { + "epoch": 1.61, + "learning_rate": 5.604102518113588e-07, + "loss": 0.7616, + "step": 7721 + }, + { + "epoch": 1.61, + "learning_rate": 5.598380625539493e-07, + "loss": 0.9878, + "step": 7722 + }, + { + "epoch": 1.61, + "learning_rate": 5.592661354917644e-07, + "loss": 1.0197, + "step": 7723 + }, + { + "epoch": 1.61, + "learning_rate": 5.586944706862609e-07, + "loss": 0.872, + "step": 7724 + }, + { + "epoch": 1.61, + "learning_rate": 5.581230681988612e-07, + "loss": 0.8438, + "step": 7725 + }, + { + "epoch": 1.61, + "learning_rate": 5.575519280909644e-07, + "loss": 0.728, + "step": 7726 + }, + { + "epoch": 1.61, + "learning_rate": 5.56981050423937e-07, + "loss": 0.9545, + "step": 7727 + }, + { + "epoch": 1.61, + "learning_rate": 5.564104352591232e-07, + "loss": 0.8749, + "step": 7728 + }, + { + "epoch": 1.61, + "learning_rate": 5.55840082657833e-07, + "loss": 0.9407, + "step": 7729 + }, + { + "epoch": 1.61, + "learning_rate": 5.552699926813519e-07, + "loss": 0.9454, + "step": 7730 + }, + { + "epoch": 1.61, + "learning_rate": 5.547001653909341e-07, + "loss": 0.926, + "step": 7731 + }, + { + "epoch": 1.61, + "learning_rate": 5.54130600847811e-07, + "loss": 0.7255, + "step": 7732 + }, + { + "epoch": 1.61, + "learning_rate": 5.535612991131795e-07, + "loss": 0.8992, + "step": 7733 + }, + { + "epoch": 1.61, + "learning_rate": 5.529922602482122e-07, + "loss": 0.6745, + "step": 7734 + }, + { + "epoch": 1.61, + "learning_rate": 5.524234843140505e-07, + "loss": 0.9892, + "step": 7735 + }, + { + "epoch": 1.61, + "learning_rate": 5.518549713718127e-07, + "loss": 0.7968, + "step": 7736 + }, + { + "epoch": 1.61, + "learning_rate": 5.512867214825823e-07, + "loss": 0.8141, + "step": 7737 + }, + { + "epoch": 1.61, + "learning_rate": 5.507187347074201e-07, + "loss": 0.9462, + "step": 7738 + }, + { + "epoch": 1.61, + "learning_rate": 5.501510111073532e-07, + "loss": 0.8778, + "step": 7739 + }, + { + "epoch": 1.61, + "learning_rate": 5.49583550743387e-07, + "loss": 1.0532, + "step": 7740 + }, + { + "epoch": 1.61, + "learning_rate": 5.490163536764925e-07, + "loss": 0.8353, + "step": 7741 + }, + { + "epoch": 1.61, + "learning_rate": 5.484494199676168e-07, + "loss": 0.9773, + "step": 7742 + }, + { + "epoch": 1.61, + "learning_rate": 5.478827496776747e-07, + "loss": 0.922, + "step": 7743 + }, + { + "epoch": 1.61, + "learning_rate": 5.473163428675573e-07, + "loss": 0.8361, + "step": 7744 + }, + { + "epoch": 1.61, + "learning_rate": 5.467501995981234e-07, + "loss": 1.0802, + "step": 7745 + }, + { + "epoch": 1.61, + "learning_rate": 5.461843199302059e-07, + "loss": 0.8799, + "step": 7746 + }, + { + "epoch": 1.61, + "learning_rate": 5.45618703924607e-07, + "loss": 1.0208, + "step": 7747 + }, + { + "epoch": 1.61, + "learning_rate": 5.450533516421045e-07, + "loss": 0.7965, + "step": 7748 + }, + { + "epoch": 1.61, + "learning_rate": 5.444882631434434e-07, + "loss": 0.8207, + "step": 7749 + }, + { + "epoch": 1.61, + "learning_rate": 5.439234384893442e-07, + "loss": 0.7703, + "step": 7750 + }, + { + "epoch": 1.61, + "learning_rate": 5.433588777404945e-07, + "loss": 0.7776, + "step": 7751 + }, + { + "epoch": 1.61, + "learning_rate": 5.427945809575598e-07, + "loss": 0.7275, + "step": 7752 + }, + { + "epoch": 1.61, + "learning_rate": 5.422305482011709e-07, + "loss": 0.9844, + "step": 7753 + }, + { + "epoch": 1.61, + "learning_rate": 5.416667795319346e-07, + "loss": 0.7748, + "step": 7754 + }, + { + "epoch": 1.61, + "learning_rate": 5.411032750104272e-07, + "loss": 0.8922, + "step": 7755 + }, + { + "epoch": 1.61, + "learning_rate": 5.405400346971981e-07, + "loss": 1.0189, + "step": 7756 + }, + { + "epoch": 1.61, + "learning_rate": 5.399770586527663e-07, + "loss": 0.8838, + "step": 7757 + }, + { + "epoch": 1.61, + "learning_rate": 5.394143469376241e-07, + "loss": 0.9165, + "step": 7758 + }, + { + "epoch": 1.61, + "learning_rate": 5.388518996122345e-07, + "loss": 0.9276, + "step": 7759 + }, + { + "epoch": 1.61, + "learning_rate": 5.382897167370335e-07, + "loss": 0.9046, + "step": 7760 + }, + { + "epoch": 1.61, + "learning_rate": 5.37727798372426e-07, + "loss": 0.949, + "step": 7761 + }, + { + "epoch": 1.61, + "learning_rate": 5.371661445787907e-07, + "loss": 1.0182, + "step": 7762 + }, + { + "epoch": 1.61, + "learning_rate": 5.366047554164775e-07, + "loss": 0.9391, + "step": 7763 + }, + { + "epoch": 1.61, + "learning_rate": 5.360436309458085e-07, + "loss": 1.0426, + "step": 7764 + }, + { + "epoch": 1.62, + "learning_rate": 5.354827712270747e-07, + "loss": 1.1461, + "step": 7765 + }, + { + "epoch": 1.62, + "learning_rate": 5.34922176320541e-07, + "loss": 1.0342, + "step": 7766 + }, + { + "epoch": 1.62, + "learning_rate": 5.343618462864439e-07, + "loss": 1.1428, + "step": 7767 + }, + { + "epoch": 1.62, + "learning_rate": 5.33801781184991e-07, + "loss": 0.8704, + "step": 7768 + }, + { + "epoch": 1.62, + "learning_rate": 5.332419810763601e-07, + "loss": 1.0631, + "step": 7769 + }, + { + "epoch": 1.62, + "learning_rate": 5.326824460207022e-07, + "loss": 0.8724, + "step": 7770 + }, + { + "epoch": 1.62, + "learning_rate": 5.321231760781397e-07, + "loss": 1.0705, + "step": 7771 + }, + { + "epoch": 1.62, + "learning_rate": 5.315641713087662e-07, + "loss": 0.9096, + "step": 7772 + }, + { + "epoch": 1.62, + "learning_rate": 5.310054317726459e-07, + "loss": 0.9032, + "step": 7773 + }, + { + "epoch": 1.62, + "learning_rate": 5.304469575298154e-07, + "loss": 0.8894, + "step": 7774 + }, + { + "epoch": 1.62, + "learning_rate": 5.298887486402834e-07, + "loss": 0.8002, + "step": 7775 + }, + { + "epoch": 1.62, + "learning_rate": 5.293308051640296e-07, + "loss": 0.9611, + "step": 7776 + }, + { + "epoch": 1.62, + "learning_rate": 5.28773127161004e-07, + "loss": 0.8668, + "step": 7777 + }, + { + "epoch": 1.62, + "learning_rate": 5.282157146911295e-07, + "loss": 1.0063, + "step": 7778 + }, + { + "epoch": 1.62, + "learning_rate": 5.276585678142999e-07, + "loss": 0.7314, + "step": 7779 + }, + { + "epoch": 1.62, + "learning_rate": 5.271016865903816e-07, + "loss": 1.1008, + "step": 7780 + }, + { + "epoch": 1.62, + "learning_rate": 5.265450710792102e-07, + "loss": 0.8581, + "step": 7781 + }, + { + "epoch": 1.62, + "learning_rate": 5.259887213405939e-07, + "loss": 0.8065, + "step": 7782 + }, + { + "epoch": 1.62, + "learning_rate": 5.254326374343133e-07, + "loss": 0.8406, + "step": 7783 + }, + { + "epoch": 1.62, + "learning_rate": 5.248768194201196e-07, + "loss": 0.7957, + "step": 7784 + }, + { + "epoch": 1.62, + "learning_rate": 5.243212673577345e-07, + "loss": 0.7749, + "step": 7785 + }, + { + "epoch": 1.62, + "learning_rate": 5.237659813068525e-07, + "loss": 0.793, + "step": 7786 + }, + { + "epoch": 1.62, + "learning_rate": 5.232109613271393e-07, + "loss": 1.1189, + "step": 7787 + }, + { + "epoch": 1.62, + "learning_rate": 5.226562074782322e-07, + "loss": 0.9058, + "step": 7788 + }, + { + "epoch": 1.62, + "learning_rate": 5.221017198197381e-07, + "loss": 0.9853, + "step": 7789 + }, + { + "epoch": 1.62, + "learning_rate": 5.215474984112374e-07, + "loss": 0.9841, + "step": 7790 + }, + { + "epoch": 1.62, + "learning_rate": 5.209935433122811e-07, + "loss": 1.0752, + "step": 7791 + }, + { + "epoch": 1.62, + "learning_rate": 5.204398545823923e-07, + "loss": 0.747, + "step": 7792 + }, + { + "epoch": 1.62, + "learning_rate": 5.198864322810636e-07, + "loss": 0.8322, + "step": 7793 + }, + { + "epoch": 1.62, + "learning_rate": 5.193332764677608e-07, + "loss": 0.7585, + "step": 7794 + }, + { + "epoch": 1.62, + "learning_rate": 5.187803872019202e-07, + "loss": 0.7714, + "step": 7795 + }, + { + "epoch": 1.62, + "learning_rate": 5.18227764542951e-07, + "loss": 1.0007, + "step": 7796 + }, + { + "epoch": 1.62, + "learning_rate": 5.176754085502305e-07, + "loss": 0.9184, + "step": 7797 + }, + { + "epoch": 1.62, + "learning_rate": 5.171233192831102e-07, + "loss": 0.855, + "step": 7798 + }, + { + "epoch": 1.62, + "learning_rate": 5.165714968009121e-07, + "loss": 0.8483, + "step": 7799 + }, + { + "epoch": 1.62, + "learning_rate": 5.160199411629301e-07, + "loss": 0.7193, + "step": 7800 + }, + { + "epoch": 1.62, + "learning_rate": 5.154686524284277e-07, + "loss": 1.0693, + "step": 7801 + }, + { + "epoch": 1.62, + "learning_rate": 5.149176306566408e-07, + "loss": 1.0563, + "step": 7802 + }, + { + "epoch": 1.62, + "learning_rate": 5.143668759067777e-07, + "loss": 0.9073, + "step": 7803 + }, + { + "epoch": 1.62, + "learning_rate": 5.13816388238017e-07, + "loss": 0.9395, + "step": 7804 + }, + { + "epoch": 1.62, + "learning_rate": 5.13266167709507e-07, + "loss": 0.8265, + "step": 7805 + }, + { + "epoch": 1.62, + "learning_rate": 5.127162143803702e-07, + "loss": 0.8431, + "step": 7806 + }, + { + "epoch": 1.62, + "learning_rate": 5.121665283096985e-07, + "loss": 0.8535, + "step": 7807 + }, + { + "epoch": 1.62, + "learning_rate": 5.116171095565565e-07, + "loss": 0.8402, + "step": 7808 + }, + { + "epoch": 1.62, + "learning_rate": 5.110679581799778e-07, + "loss": 0.7987, + "step": 7809 + }, + { + "epoch": 1.62, + "learning_rate": 5.105190742389694e-07, + "loss": 0.7508, + "step": 7810 + }, + { + "epoch": 1.62, + "learning_rate": 5.099704577925085e-07, + "loss": 0.873, + "step": 7811 + }, + { + "epoch": 1.62, + "learning_rate": 5.094221088995453e-07, + "loss": 0.9124, + "step": 7812 + }, + { + "epoch": 1.62, + "learning_rate": 5.08874027618998e-07, + "loss": 0.8566, + "step": 7813 + }, + { + "epoch": 1.63, + "learning_rate": 5.083262140097589e-07, + "loss": 0.9592, + "step": 7814 + }, + { + "epoch": 1.63, + "learning_rate": 5.0777866813069e-07, + "loss": 0.8614, + "step": 7815 + }, + { + "epoch": 1.63, + "learning_rate": 5.072313900406263e-07, + "loss": 0.9067, + "step": 7816 + }, + { + "epoch": 1.63, + "learning_rate": 5.06684379798371e-07, + "loss": 0.8461, + "step": 7817 + }, + { + "epoch": 1.63, + "learning_rate": 5.06137637462701e-07, + "loss": 0.7451, + "step": 7818 + }, + { + "epoch": 1.63, + "learning_rate": 5.05591163092364e-07, + "loss": 0.7938, + "step": 7819 + }, + { + "epoch": 1.63, + "learning_rate": 5.050449567460795e-07, + "loss": 1.0725, + "step": 7820 + }, + { + "epoch": 1.63, + "learning_rate": 5.044990184825354e-07, + "loss": 0.8589, + "step": 7821 + }, + { + "epoch": 1.63, + "learning_rate": 5.039533483603937e-07, + "loss": 1.0993, + "step": 7822 + }, + { + "epoch": 1.63, + "learning_rate": 5.034079464382876e-07, + "loss": 1.1152, + "step": 7823 + }, + { + "epoch": 1.63, + "learning_rate": 5.028628127748184e-07, + "loss": 1.0804, + "step": 7824 + }, + { + "epoch": 1.63, + "learning_rate": 5.02317947428562e-07, + "loss": 0.9036, + "step": 7825 + }, + { + "epoch": 1.63, + "learning_rate": 5.017733504580639e-07, + "loss": 0.983, + "step": 7826 + }, + { + "epoch": 1.63, + "learning_rate": 5.012290219218415e-07, + "loss": 0.9709, + "step": 7827 + }, + { + "epoch": 1.63, + "learning_rate": 5.006849618783819e-07, + "loss": 0.9229, + "step": 7828 + }, + { + "epoch": 1.63, + "learning_rate": 5.001411703861448e-07, + "loss": 1.0064, + "step": 7829 + }, + { + "epoch": 1.63, + "learning_rate": 4.995976475035607e-07, + "loss": 0.8803, + "step": 7830 + }, + { + "epoch": 1.63, + "learning_rate": 4.990543932890312e-07, + "loss": 0.9092, + "step": 7831 + }, + { + "epoch": 1.63, + "learning_rate": 4.985114078009283e-07, + "loss": 1.1705, + "step": 7832 + }, + { + "epoch": 1.63, + "learning_rate": 4.979686910975962e-07, + "loss": 1.1209, + "step": 7833 + }, + { + "epoch": 1.63, + "learning_rate": 4.974262432373494e-07, + "loss": 0.8647, + "step": 7834 + }, + { + "epoch": 1.63, + "learning_rate": 4.968840642784752e-07, + "loss": 0.8547, + "step": 7835 + }, + { + "epoch": 1.63, + "learning_rate": 4.963421542792289e-07, + "loss": 0.775, + "step": 7836 + }, + { + "epoch": 1.63, + "learning_rate": 4.958005132978393e-07, + "loss": 0.8283, + "step": 7837 + }, + { + "epoch": 1.63, + "learning_rate": 4.95259141392506e-07, + "loss": 0.9989, + "step": 7838 + }, + { + "epoch": 1.63, + "learning_rate": 4.947180386214001e-07, + "loss": 0.9493, + "step": 7839 + }, + { + "epoch": 1.63, + "learning_rate": 4.941772050426612e-07, + "loss": 0.7996, + "step": 7840 + }, + { + "epoch": 1.63, + "learning_rate": 4.936366407144029e-07, + "loss": 1.0084, + "step": 7841 + }, + { + "epoch": 1.63, + "learning_rate": 4.930963456947086e-07, + "loss": 1.0563, + "step": 7842 + }, + { + "epoch": 1.63, + "learning_rate": 4.925563200416338e-07, + "loss": 0.7978, + "step": 7843 + }, + { + "epoch": 1.63, + "learning_rate": 4.920165638132028e-07, + "loss": 0.9246, + "step": 7844 + }, + { + "epoch": 1.63, + "learning_rate": 4.914770770674131e-07, + "loss": 0.8063, + "step": 7845 + }, + { + "epoch": 1.63, + "learning_rate": 4.909378598622325e-07, + "loss": 0.9705, + "step": 7846 + }, + { + "epoch": 1.63, + "learning_rate": 4.903989122556004e-07, + "loss": 0.9802, + "step": 7847 + }, + { + "epoch": 1.63, + "learning_rate": 4.898602343054255e-07, + "loss": 0.7477, + "step": 7848 + }, + { + "epoch": 1.63, + "learning_rate": 4.893218260695894e-07, + "loss": 1.0556, + "step": 7849 + }, + { + "epoch": 1.63, + "learning_rate": 4.887836876059436e-07, + "loss": 0.8678, + "step": 7850 + }, + { + "epoch": 1.63, + "learning_rate": 4.882458189723124e-07, + "loss": 0.9698, + "step": 7851 + }, + { + "epoch": 1.63, + "learning_rate": 4.877082202264878e-07, + "loss": 0.8349, + "step": 7852 + }, + { + "epoch": 1.63, + "learning_rate": 4.871708914262356e-07, + "loss": 0.9338, + "step": 7853 + }, + { + "epoch": 1.63, + "learning_rate": 4.866338326292922e-07, + "loss": 0.9724, + "step": 7854 + }, + { + "epoch": 1.63, + "learning_rate": 4.860970438933647e-07, + "loss": 0.7446, + "step": 7855 + }, + { + "epoch": 1.63, + "learning_rate": 4.855605252761295e-07, + "loss": 0.7485, + "step": 7856 + }, + { + "epoch": 1.63, + "learning_rate": 4.850242768352363e-07, + "loss": 1.1415, + "step": 7857 + }, + { + "epoch": 1.63, + "learning_rate": 4.844882986283053e-07, + "loss": 0.8878, + "step": 7858 + }, + { + "epoch": 1.63, + "learning_rate": 4.839525907129282e-07, + "loss": 0.752, + "step": 7859 + }, + { + "epoch": 1.63, + "learning_rate": 4.834171531466645e-07, + "loss": 0.8947, + "step": 7860 + }, + { + "epoch": 1.63, + "learning_rate": 4.828819859870482e-07, + "loss": 0.7646, + "step": 7861 + }, + { + "epoch": 1.64, + "learning_rate": 4.823470892915829e-07, + "loss": 1.107, + "step": 7862 + }, + { + "epoch": 1.64, + "learning_rate": 4.81812463117744e-07, + "loss": 1.018, + "step": 7863 + }, + { + "epoch": 1.64, + "learning_rate": 4.812781075229754e-07, + "loss": 0.9532, + "step": 7864 + }, + { + "epoch": 1.64, + "learning_rate": 4.807440225646947e-07, + "loss": 0.9927, + "step": 7865 + }, + { + "epoch": 1.64, + "learning_rate": 4.802102083002888e-07, + "loss": 1.074, + "step": 7866 + }, + { + "epoch": 1.64, + "learning_rate": 4.796766647871168e-07, + "loss": 0.8229, + "step": 7867 + }, + { + "epoch": 1.64, + "learning_rate": 4.79143392082507e-07, + "loss": 0.9011, + "step": 7868 + }, + { + "epoch": 1.64, + "learning_rate": 4.786103902437593e-07, + "loss": 0.8655, + "step": 7869 + }, + { + "epoch": 1.64, + "learning_rate": 4.780776593281455e-07, + "loss": 0.974, + "step": 7870 + }, + { + "epoch": 1.64, + "learning_rate": 4.775451993929082e-07, + "loss": 0.8319, + "step": 7871 + }, + { + "epoch": 1.64, + "learning_rate": 4.770130104952583e-07, + "loss": 0.8254, + "step": 7872 + }, + { + "epoch": 1.64, + "learning_rate": 4.764810926923803e-07, + "loss": 1.0475, + "step": 7873 + }, + { + "epoch": 1.64, + "learning_rate": 4.759494460414289e-07, + "loss": 1.0635, + "step": 7874 + }, + { + "epoch": 1.64, + "learning_rate": 4.7541807059953e-07, + "loss": 1.1328, + "step": 7875 + }, + { + "epoch": 1.64, + "learning_rate": 4.7488696642377873e-07, + "loss": 0.9103, + "step": 7876 + }, + { + "epoch": 1.64, + "learning_rate": 4.7435613357124264e-07, + "loss": 0.9521, + "step": 7877 + }, + { + "epoch": 1.64, + "learning_rate": 4.7382557209895985e-07, + "loss": 1.0136, + "step": 7878 + }, + { + "epoch": 1.64, + "learning_rate": 4.732952820639397e-07, + "loss": 0.7237, + "step": 7879 + }, + { + "epoch": 1.64, + "learning_rate": 4.727652635231606e-07, + "loss": 1.0052, + "step": 7880 + }, + { + "epoch": 1.64, + "learning_rate": 4.7223551653357346e-07, + "loss": 0.8823, + "step": 7881 + }, + { + "epoch": 1.64, + "learning_rate": 4.717060411520999e-07, + "loss": 0.8817, + "step": 7882 + }, + { + "epoch": 1.64, + "learning_rate": 4.7117683743563225e-07, + "loss": 0.8353, + "step": 7883 + }, + { + "epoch": 1.64, + "learning_rate": 4.7064790544103255e-07, + "loss": 0.9258, + "step": 7884 + }, + { + "epoch": 1.64, + "learning_rate": 4.7011924522513473e-07, + "loss": 0.9198, + "step": 7885 + }, + { + "epoch": 1.64, + "learning_rate": 4.6959085684474336e-07, + "loss": 0.9288, + "step": 7886 + }, + { + "epoch": 1.64, + "learning_rate": 4.690627403566347e-07, + "loss": 0.858, + "step": 7887 + }, + { + "epoch": 1.64, + "learning_rate": 4.685348958175526e-07, + "loss": 0.7908, + "step": 7888 + }, + { + "epoch": 1.64, + "learning_rate": 4.680073232842168e-07, + "loss": 0.974, + "step": 7889 + }, + { + "epoch": 1.64, + "learning_rate": 4.674800228133126e-07, + "loss": 0.8138, + "step": 7890 + }, + { + "epoch": 1.64, + "learning_rate": 4.6695299446149996e-07, + "loss": 1.1245, + "step": 7891 + }, + { + "epoch": 1.64, + "learning_rate": 4.664262382854057e-07, + "loss": 0.9561, + "step": 7892 + }, + { + "epoch": 1.64, + "learning_rate": 4.65899754341633e-07, + "loss": 0.8453, + "step": 7893 + }, + { + "epoch": 1.64, + "learning_rate": 4.6537354268675015e-07, + "loss": 0.8212, + "step": 7894 + }, + { + "epoch": 1.64, + "learning_rate": 4.6484760337729984e-07, + "loss": 1.1104, + "step": 7895 + }, + { + "epoch": 1.64, + "learning_rate": 4.643219364697925e-07, + "loss": 0.86, + "step": 7896 + }, + { + "epoch": 1.64, + "learning_rate": 4.637965420207133e-07, + "loss": 0.8827, + "step": 7897 + }, + { + "epoch": 1.64, + "learning_rate": 4.6327142008651415e-07, + "loss": 0.8113, + "step": 7898 + }, + { + "epoch": 1.64, + "learning_rate": 4.627465707236205e-07, + "loss": 0.8522, + "step": 7899 + }, + { + "epoch": 1.64, + "learning_rate": 4.6222199398842524e-07, + "loss": 0.976, + "step": 7900 + }, + { + "epoch": 1.64, + "learning_rate": 4.61697689937297e-07, + "loss": 1.1883, + "step": 7901 + }, + { + "epoch": 1.64, + "learning_rate": 4.6117365862657036e-07, + "loss": 1.0324, + "step": 7902 + }, + { + "epoch": 1.64, + "learning_rate": 4.6064990011255357e-07, + "loss": 0.6996, + "step": 7903 + }, + { + "epoch": 1.64, + "learning_rate": 4.601264144515224e-07, + "loss": 1.0644, + "step": 7904 + }, + { + "epoch": 1.64, + "learning_rate": 4.5960320169972834e-07, + "loss": 0.8483, + "step": 7905 + }, + { + "epoch": 1.64, + "learning_rate": 4.590802619133879e-07, + "loss": 1.3283, + "step": 7906 + }, + { + "epoch": 1.64, + "learning_rate": 4.585575951486931e-07, + "loss": 0.9576, + "step": 7907 + }, + { + "epoch": 1.64, + "learning_rate": 4.580352014618017e-07, + "loss": 0.7944, + "step": 7908 + }, + { + "epoch": 1.64, + "learning_rate": 4.57513080908848e-07, + "loss": 0.8426, + "step": 7909 + }, + { + "epoch": 1.65, + "learning_rate": 4.569912335459316e-07, + "loss": 0.7742, + "step": 7910 + }, + { + "epoch": 1.65, + "learning_rate": 4.564696594291263e-07, + "loss": 1.0146, + "step": 7911 + }, + { + "epoch": 1.65, + "learning_rate": 4.5594835861447357e-07, + "loss": 0.926, + "step": 7912 + }, + { + "epoch": 1.65, + "learning_rate": 4.554273311579893e-07, + "loss": 1.1329, + "step": 7913 + }, + { + "epoch": 1.65, + "learning_rate": 4.5490657711565596e-07, + "loss": 0.8905, + "step": 7914 + }, + { + "epoch": 1.65, + "learning_rate": 4.543860965434299e-07, + "loss": 0.9037, + "step": 7915 + }, + { + "epoch": 1.65, + "learning_rate": 4.5386588949723496e-07, + "loss": 0.7112, + "step": 7916 + }, + { + "epoch": 1.65, + "learning_rate": 4.533459560329698e-07, + "loss": 0.7502, + "step": 7917 + }, + { + "epoch": 1.65, + "learning_rate": 4.528262962064991e-07, + "loss": 1.1333, + "step": 7918 + }, + { + "epoch": 1.65, + "learning_rate": 4.523069100736621e-07, + "loss": 0.8818, + "step": 7919 + }, + { + "epoch": 1.65, + "learning_rate": 4.5178779769026425e-07, + "loss": 0.9118, + "step": 7920 + }, + { + "epoch": 1.65, + "learning_rate": 4.512689591120872e-07, + "loss": 0.941, + "step": 7921 + }, + { + "epoch": 1.65, + "learning_rate": 4.5075039439487774e-07, + "loss": 1.0004, + "step": 7922 + }, + { + "epoch": 1.65, + "learning_rate": 4.502321035943573e-07, + "loss": 0.8705, + "step": 7923 + }, + { + "epoch": 1.65, + "learning_rate": 4.4971408676621395e-07, + "loss": 0.9889, + "step": 7924 + }, + { + "epoch": 1.65, + "learning_rate": 4.4919634396611153e-07, + "loss": 0.9638, + "step": 7925 + }, + { + "epoch": 1.65, + "learning_rate": 4.4867887524967923e-07, + "loss": 0.7883, + "step": 7926 + }, + { + "epoch": 1.65, + "learning_rate": 4.4816168067252014e-07, + "loss": 1.0417, + "step": 7927 + }, + { + "epoch": 1.65, + "learning_rate": 4.476447602902053e-07, + "loss": 0.792, + "step": 7928 + }, + { + "epoch": 1.65, + "learning_rate": 4.471281141582798e-07, + "loss": 0.8504, + "step": 7929 + }, + { + "epoch": 1.65, + "learning_rate": 4.46611742332256e-07, + "loss": 0.797, + "step": 7930 + }, + { + "epoch": 1.65, + "learning_rate": 4.460956448676188e-07, + "loss": 1.1923, + "step": 7931 + }, + { + "epoch": 1.65, + "learning_rate": 4.4557982181982083e-07, + "loss": 1.102, + "step": 7932 + }, + { + "epoch": 1.65, + "learning_rate": 4.450642732442904e-07, + "loss": 0.7777, + "step": 7933 + }, + { + "epoch": 1.65, + "learning_rate": 4.4454899919642066e-07, + "loss": 0.9094, + "step": 7934 + }, + { + "epoch": 1.65, + "learning_rate": 4.440339997315792e-07, + "loss": 1.0591, + "step": 7935 + }, + { + "epoch": 1.65, + "learning_rate": 4.435192749051008e-07, + "loss": 0.9828, + "step": 7936 + }, + { + "epoch": 1.65, + "learning_rate": 4.430048247722952e-07, + "loss": 0.8461, + "step": 7937 + }, + { + "epoch": 1.65, + "learning_rate": 4.424906493884379e-07, + "loss": 0.8665, + "step": 7938 + }, + { + "epoch": 1.65, + "learning_rate": 4.4197674880877893e-07, + "loss": 1.0419, + "step": 7939 + }, + { + "epoch": 1.65, + "learning_rate": 4.414631230885339e-07, + "loss": 0.9791, + "step": 7940 + }, + { + "epoch": 1.65, + "learning_rate": 4.4094977228289525e-07, + "loss": 0.8655, + "step": 7941 + }, + { + "epoch": 1.65, + "learning_rate": 4.4043669644702055e-07, + "loss": 0.7628, + "step": 7942 + }, + { + "epoch": 1.65, + "learning_rate": 4.399238956360407e-07, + "loss": 0.9215, + "step": 7943 + }, + { + "epoch": 1.65, + "learning_rate": 4.3941136990505405e-07, + "loss": 0.9428, + "step": 7944 + }, + { + "epoch": 1.65, + "learning_rate": 4.388991193091344e-07, + "loss": 0.778, + "step": 7945 + }, + { + "epoch": 1.65, + "learning_rate": 4.3838714390332115e-07, + "loss": 0.9111, + "step": 7946 + }, + { + "epoch": 1.65, + "learning_rate": 4.378754437426269e-07, + "loss": 0.7701, + "step": 7947 + }, + { + "epoch": 1.65, + "learning_rate": 4.3736401888203204e-07, + "loss": 1.0501, + "step": 7948 + }, + { + "epoch": 1.65, + "learning_rate": 4.3685286937649225e-07, + "loss": 0.8272, + "step": 7949 + }, + { + "epoch": 1.65, + "learning_rate": 4.363419952809277e-07, + "loss": 0.8043, + "step": 7950 + }, + { + "epoch": 1.65, + "learning_rate": 4.358313966502336e-07, + "loss": 0.8903, + "step": 7951 + }, + { + "epoch": 1.65, + "learning_rate": 4.3532107353927166e-07, + "loss": 0.7699, + "step": 7952 + }, + { + "epoch": 1.65, + "learning_rate": 4.3481102600287846e-07, + "loss": 0.8218, + "step": 7953 + }, + { + "epoch": 1.65, + "learning_rate": 4.34301254095857e-07, + "loss": 0.7829, + "step": 7954 + }, + { + "epoch": 1.65, + "learning_rate": 4.3379175787298264e-07, + "loss": 0.899, + "step": 7955 + }, + { + "epoch": 1.65, + "learning_rate": 4.332825373890007e-07, + "loss": 0.9409, + "step": 7956 + }, + { + "epoch": 1.65, + "learning_rate": 4.3277359269862773e-07, + "loss": 0.7571, + "step": 7957 + }, + { + "epoch": 1.66, + "learning_rate": 4.3226492385654857e-07, + "loss": 0.8899, + "step": 7958 + }, + { + "epoch": 1.66, + "learning_rate": 4.3175653091741965e-07, + "loss": 1.0018, + "step": 7959 + }, + { + "epoch": 1.66, + "learning_rate": 4.3124841393586845e-07, + "loss": 0.8903, + "step": 7960 + }, + { + "epoch": 1.66, + "learning_rate": 4.307405729664927e-07, + "loss": 0.8082, + "step": 7961 + }, + { + "epoch": 1.66, + "learning_rate": 4.3023300806385815e-07, + "loss": 0.7627, + "step": 7962 + }, + { + "epoch": 1.66, + "learning_rate": 4.297257192825037e-07, + "loss": 1.1314, + "step": 7963 + }, + { + "epoch": 1.66, + "learning_rate": 4.292187066769372e-07, + "loss": 0.7789, + "step": 7964 + }, + { + "epoch": 1.66, + "learning_rate": 4.28711970301638e-07, + "loss": 0.8608, + "step": 7965 + }, + { + "epoch": 1.66, + "learning_rate": 4.2820551021105325e-07, + "loss": 0.8686, + "step": 7966 + }, + { + "epoch": 1.66, + "learning_rate": 4.276993264596031e-07, + "loss": 0.9389, + "step": 7967 + }, + { + "epoch": 1.66, + "learning_rate": 4.2719341910167663e-07, + "loss": 0.9042, + "step": 7968 + }, + { + "epoch": 1.66, + "learning_rate": 4.266877881916341e-07, + "loss": 0.7352, + "step": 7969 + }, + { + "epoch": 1.66, + "learning_rate": 4.2618243378380475e-07, + "loss": 1.2006, + "step": 7970 + }, + { + "epoch": 1.66, + "learning_rate": 4.2567735593248933e-07, + "loss": 0.9778, + "step": 7971 + }, + { + "epoch": 1.66, + "learning_rate": 4.2517255469195825e-07, + "loss": 0.8841, + "step": 7972 + }, + { + "epoch": 1.66, + "learning_rate": 4.2466803011645306e-07, + "loss": 0.8116, + "step": 7973 + }, + { + "epoch": 1.66, + "learning_rate": 4.241637822601834e-07, + "loss": 0.912, + "step": 7974 + }, + { + "epoch": 1.66, + "learning_rate": 4.2365981117733156e-07, + "loss": 0.8129, + "step": 7975 + }, + { + "epoch": 1.66, + "learning_rate": 4.231561169220496e-07, + "loss": 0.8204, + "step": 7976 + }, + { + "epoch": 1.66, + "learning_rate": 4.226526995484592e-07, + "loss": 0.941, + "step": 7977 + }, + { + "epoch": 1.66, + "learning_rate": 4.2214955911065177e-07, + "loss": 0.8823, + "step": 7978 + }, + { + "epoch": 1.66, + "learning_rate": 4.216466956626904e-07, + "loss": 0.8758, + "step": 7979 + }, + { + "epoch": 1.66, + "learning_rate": 4.211441092586076e-07, + "loss": 0.9029, + "step": 7980 + }, + { + "epoch": 1.66, + "learning_rate": 4.206417999524067e-07, + "loss": 0.9425, + "step": 7981 + }, + { + "epoch": 1.66, + "learning_rate": 4.201397677980596e-07, + "loss": 0.8014, + "step": 7982 + }, + { + "epoch": 1.66, + "learning_rate": 4.196380128495105e-07, + "loss": 0.8771, + "step": 7983 + }, + { + "epoch": 1.66, + "learning_rate": 4.1913653516067295e-07, + "loss": 0.8513, + "step": 7984 + }, + { + "epoch": 1.66, + "learning_rate": 4.1863533478543116e-07, + "loss": 1.0987, + "step": 7985 + }, + { + "epoch": 1.66, + "learning_rate": 4.1813441177763767e-07, + "loss": 0.8571, + "step": 7986 + }, + { + "epoch": 1.66, + "learning_rate": 4.1763376619111717e-07, + "loss": 0.9496, + "step": 7987 + }, + { + "epoch": 1.66, + "learning_rate": 4.1713339807966453e-07, + "loss": 0.7818, + "step": 7988 + }, + { + "epoch": 1.66, + "learning_rate": 4.166333074970445e-07, + "loss": 1.0026, + "step": 7989 + }, + { + "epoch": 1.66, + "learning_rate": 4.161334944969908e-07, + "loss": 0.8878, + "step": 7990 + }, + { + "epoch": 1.66, + "learning_rate": 4.1563395913320836e-07, + "loss": 0.9175, + "step": 7991 + }, + { + "epoch": 1.66, + "learning_rate": 4.1513470145937296e-07, + "loss": 1.0157, + "step": 7992 + }, + { + "epoch": 1.66, + "learning_rate": 4.1463572152913e-07, + "loss": 0.733, + "step": 7993 + }, + { + "epoch": 1.66, + "learning_rate": 4.1413701939609327e-07, + "loss": 0.6826, + "step": 7994 + }, + { + "epoch": 1.66, + "learning_rate": 4.136385951138496e-07, + "loss": 0.9611, + "step": 7995 + }, + { + "epoch": 1.66, + "learning_rate": 4.1314044873595447e-07, + "loss": 0.7076, + "step": 7996 + }, + { + "epoch": 1.66, + "learning_rate": 4.1264258031593403e-07, + "loss": 1.1905, + "step": 7997 + }, + { + "epoch": 1.66, + "learning_rate": 4.121449899072829e-07, + "loss": 0.8855, + "step": 7998 + }, + { + "epoch": 1.66, + "learning_rate": 4.1164767756346786e-07, + "loss": 1.0137, + "step": 7999 + }, + { + "epoch": 1.66, + "learning_rate": 4.111506433379256e-07, + "loss": 1.059, + "step": 8000 + }, + { + "epoch": 1.66, + "eval_loss": NaN, + "eval_runtime": 15.0341, + "eval_samples_per_second": 352.2, + "eval_steps_per_second": 44.033, + "step": 8000 + }, + { + "epoch": 1.66, + "learning_rate": 4.106538872840624e-07, + "loss": 0.9999, + "step": 8001 + }, + { + "epoch": 1.66, + "learning_rate": 4.1015740945525346e-07, + "loss": 0.9226, + "step": 8002 + }, + { + "epoch": 1.66, + "learning_rate": 4.0966120990484625e-07, + "loss": 0.9423, + "step": 8003 + }, + { + "epoch": 1.66, + "learning_rate": 4.09165288686157e-07, + "loss": 0.8778, + "step": 8004 + }, + { + "epoch": 1.66, + "learning_rate": 4.0866964585247344e-07, + "loss": 1.1924, + "step": 8005 + }, + { + "epoch": 1.67, + "learning_rate": 4.0817428145705104e-07, + "loss": 0.9885, + "step": 8006 + }, + { + "epoch": 1.67, + "learning_rate": 4.0767919555311686e-07, + "loss": 0.8103, + "step": 8007 + }, + { + "epoch": 1.67, + "learning_rate": 4.0718438819386795e-07, + "loss": 1.0332, + "step": 8008 + }, + { + "epoch": 1.67, + "learning_rate": 4.066898594324725e-07, + "loss": 0.9772, + "step": 8009 + }, + { + "epoch": 1.67, + "learning_rate": 4.06195609322066e-07, + "loss": 1.1248, + "step": 8010 + }, + { + "epoch": 1.67, + "learning_rate": 4.05701637915756e-07, + "loss": 0.9158, + "step": 8011 + }, + { + "epoch": 1.67, + "learning_rate": 4.0520794526661995e-07, + "loss": 0.8789, + "step": 8012 + }, + { + "epoch": 1.67, + "learning_rate": 4.0471453142770553e-07, + "loss": 0.8423, + "step": 8013 + }, + { + "epoch": 1.67, + "learning_rate": 4.042213964520289e-07, + "loss": 1.0217, + "step": 8014 + }, + { + "epoch": 1.67, + "learning_rate": 4.037285403925781e-07, + "loss": 1.0847, + "step": 8015 + }, + { + "epoch": 1.67, + "learning_rate": 4.0323596330231037e-07, + "loss": 1.0449, + "step": 8016 + }, + { + "epoch": 1.67, + "learning_rate": 4.0274366523415376e-07, + "loss": 0.7196, + "step": 8017 + }, + { + "epoch": 1.67, + "learning_rate": 4.022516462410042e-07, + "loss": 0.9101, + "step": 8018 + }, + { + "epoch": 1.67, + "learning_rate": 4.017599063757301e-07, + "loss": 0.9085, + "step": 8019 + }, + { + "epoch": 1.67, + "learning_rate": 4.0126844569116826e-07, + "loss": 0.9099, + "step": 8020 + }, + { + "epoch": 1.67, + "learning_rate": 4.0077726424012775e-07, + "loss": 0.9464, + "step": 8021 + }, + { + "epoch": 1.67, + "learning_rate": 4.0028636207538295e-07, + "loss": 0.9051, + "step": 8022 + }, + { + "epoch": 1.67, + "learning_rate": 3.9979573924968437e-07, + "loss": 1.0067, + "step": 8023 + }, + { + "epoch": 1.67, + "learning_rate": 3.9930539581574754e-07, + "loss": 0.9715, + "step": 8024 + }, + { + "epoch": 1.67, + "learning_rate": 3.9881533182626097e-07, + "loss": 1.0535, + "step": 8025 + }, + { + "epoch": 1.67, + "learning_rate": 3.9832554733387983e-07, + "loss": 1.0969, + "step": 8026 + }, + { + "epoch": 1.67, + "learning_rate": 3.9783604239123473e-07, + "loss": 0.6979, + "step": 8027 + }, + { + "epoch": 1.67, + "learning_rate": 3.973468170509202e-07, + "loss": 0.8041, + "step": 8028 + }, + { + "epoch": 1.67, + "learning_rate": 3.9685787136550533e-07, + "loss": 0.9548, + "step": 8029 + }, + { + "epoch": 1.67, + "learning_rate": 3.9636920538752495e-07, + "loss": 1.0363, + "step": 8030 + }, + { + "epoch": 1.67, + "learning_rate": 3.9588081916948883e-07, + "loss": 0.9346, + "step": 8031 + }, + { + "epoch": 1.67, + "learning_rate": 3.9539271276387256e-07, + "loss": 0.9631, + "step": 8032 + }, + { + "epoch": 1.67, + "learning_rate": 3.9490488622312395e-07, + "loss": 0.7549, + "step": 8033 + }, + { + "epoch": 1.67, + "learning_rate": 3.9441733959965797e-07, + "loss": 0.7081, + "step": 8034 + }, + { + "epoch": 1.67, + "learning_rate": 3.9393007294586446e-07, + "loss": 1.0971, + "step": 8035 + }, + { + "epoch": 1.67, + "learning_rate": 3.9344308631409785e-07, + "loss": 0.885, + "step": 8036 + }, + { + "epoch": 1.67, + "learning_rate": 3.929563797566863e-07, + "loss": 0.9741, + "step": 8037 + }, + { + "epoch": 1.67, + "learning_rate": 3.9246995332592463e-07, + "loss": 1.0041, + "step": 8038 + }, + { + "epoch": 1.67, + "learning_rate": 3.9198380707408133e-07, + "loss": 0.7751, + "step": 8039 + }, + { + "epoch": 1.67, + "learning_rate": 3.914979410533913e-07, + "loss": 0.7968, + "step": 8040 + }, + { + "epoch": 1.67, + "learning_rate": 3.9101235531606216e-07, + "loss": 0.7517, + "step": 8041 + }, + { + "epoch": 1.67, + "learning_rate": 3.905270499142678e-07, + "loss": 1.0712, + "step": 8042 + }, + { + "epoch": 1.67, + "learning_rate": 3.9004202490015716e-07, + "loss": 0.7683, + "step": 8043 + }, + { + "epoch": 1.67, + "learning_rate": 3.895572803258439e-07, + "loss": 0.7281, + "step": 8044 + }, + { + "epoch": 1.67, + "learning_rate": 3.890728162434154e-07, + "loss": 0.8172, + "step": 8045 + }, + { + "epoch": 1.67, + "learning_rate": 3.885886327049253e-07, + "loss": 0.8298, + "step": 8046 + }, + { + "epoch": 1.67, + "learning_rate": 3.881047297624013e-07, + "loss": 0.8632, + "step": 8047 + }, + { + "epoch": 1.67, + "learning_rate": 3.8762110746783717e-07, + "loss": 1.0661, + "step": 8048 + }, + { + "epoch": 1.67, + "learning_rate": 3.871377658731994e-07, + "loss": 0.8849, + "step": 8049 + }, + { + "epoch": 1.67, + "learning_rate": 3.8665470503042066e-07, + "loss": 0.7799, + "step": 8050 + }, + { + "epoch": 1.67, + "learning_rate": 3.8617192499140883e-07, + "loss": 0.6922, + "step": 8051 + }, + { + "epoch": 1.67, + "learning_rate": 3.8568942580803644e-07, + "loss": 1.1892, + "step": 8052 + }, + { + "epoch": 1.67, + "learning_rate": 3.8520720753214964e-07, + "loss": 0.9505, + "step": 8053 + }, + { + "epoch": 1.68, + "learning_rate": 3.8472527021556e-07, + "loss": 0.7492, + "step": 8054 + }, + { + "epoch": 1.68, + "learning_rate": 3.8424361391005515e-07, + "loss": 0.9206, + "step": 8055 + }, + { + "epoch": 1.68, + "learning_rate": 3.837622386673866e-07, + "loss": 0.8665, + "step": 8056 + }, + { + "epoch": 1.68, + "learning_rate": 3.832811445392793e-07, + "loss": 0.9514, + "step": 8057 + }, + { + "epoch": 1.68, + "learning_rate": 3.82800331577425e-07, + "loss": 0.8602, + "step": 8058 + }, + { + "epoch": 1.68, + "learning_rate": 3.8231979983348996e-07, + "loss": 1.0227, + "step": 8059 + }, + { + "epoch": 1.68, + "learning_rate": 3.818395493591046e-07, + "loss": 0.9757, + "step": 8060 + }, + { + "epoch": 1.68, + "learning_rate": 3.8135958020587356e-07, + "loss": 0.7419, + "step": 8061 + }, + { + "epoch": 1.68, + "learning_rate": 3.808798924253677e-07, + "loss": 0.9683, + "step": 8062 + }, + { + "epoch": 1.68, + "learning_rate": 3.8040048606913134e-07, + "loss": 0.7708, + "step": 8063 + }, + { + "epoch": 1.68, + "learning_rate": 3.7992136118867573e-07, + "loss": 0.874, + "step": 8064 + }, + { + "epoch": 1.68, + "learning_rate": 3.794425178354832e-07, + "loss": 0.8225, + "step": 8065 + }, + { + "epoch": 1.68, + "learning_rate": 3.7896395606100374e-07, + "loss": 0.9948, + "step": 8066 + }, + { + "epoch": 1.68, + "learning_rate": 3.7848567591666184e-07, + "loss": 0.7983, + "step": 8067 + }, + { + "epoch": 1.68, + "learning_rate": 3.78007677453846e-07, + "loss": 0.8968, + "step": 8068 + }, + { + "epoch": 1.68, + "learning_rate": 3.7752996072391886e-07, + "loss": 0.7875, + "step": 8069 + }, + { + "epoch": 1.68, + "learning_rate": 3.7705252577820904e-07, + "loss": 0.7836, + "step": 8070 + }, + { + "epoch": 1.68, + "learning_rate": 3.7657537266801946e-07, + "loss": 1.0832, + "step": 8071 + }, + { + "epoch": 1.68, + "learning_rate": 3.7609850144461845e-07, + "loss": 0.8907, + "step": 8072 + }, + { + "epoch": 1.68, + "learning_rate": 3.7562191215924667e-07, + "loss": 0.67, + "step": 8073 + }, + { + "epoch": 1.68, + "learning_rate": 3.7514560486311155e-07, + "loss": 0.888, + "step": 8074 + }, + { + "epoch": 1.68, + "learning_rate": 3.746695796073958e-07, + "loss": 0.7664, + "step": 8075 + }, + { + "epoch": 1.68, + "learning_rate": 3.741938364432451e-07, + "loss": 1.0003, + "step": 8076 + }, + { + "epoch": 1.68, + "learning_rate": 3.7371837542178034e-07, + "loss": 0.6665, + "step": 8077 + }, + { + "epoch": 1.68, + "learning_rate": 3.7324319659408755e-07, + "loss": 0.9255, + "step": 8078 + }, + { + "epoch": 1.68, + "learning_rate": 3.72768300011227e-07, + "loss": 0.8078, + "step": 8079 + }, + { + "epoch": 1.68, + "learning_rate": 3.722936857242244e-07, + "loss": 0.7932, + "step": 8080 + }, + { + "epoch": 1.68, + "learning_rate": 3.718193537840784e-07, + "loss": 0.9053, + "step": 8081 + }, + { + "epoch": 1.68, + "learning_rate": 3.7134530424175386e-07, + "loss": 1.0169, + "step": 8082 + }, + { + "epoch": 1.68, + "learning_rate": 3.708715371481903e-07, + "loss": 0.8263, + "step": 8083 + }, + { + "epoch": 1.68, + "learning_rate": 3.703980525542918e-07, + "loss": 0.7992, + "step": 8084 + }, + { + "epoch": 1.68, + "learning_rate": 3.699248505109356e-07, + "loss": 1.0458, + "step": 8085 + }, + { + "epoch": 1.68, + "learning_rate": 3.6945193106896535e-07, + "loss": 0.8447, + "step": 8086 + }, + { + "epoch": 1.68, + "learning_rate": 3.689792942791984e-07, + "loss": 1.0788, + "step": 8087 + }, + { + "epoch": 1.68, + "learning_rate": 3.6850694019241817e-07, + "loss": 0.8415, + "step": 8088 + }, + { + "epoch": 1.68, + "learning_rate": 3.68034868859379e-07, + "loss": 0.9973, + "step": 8089 + }, + { + "epoch": 1.68, + "learning_rate": 3.6756308033080633e-07, + "loss": 0.7332, + "step": 8090 + }, + { + "epoch": 1.68, + "learning_rate": 3.670915746573923e-07, + "loss": 0.7902, + "step": 8091 + }, + { + "epoch": 1.68, + "learning_rate": 3.6662035188980037e-07, + "loss": 0.7117, + "step": 8092 + }, + { + "epoch": 1.68, + "learning_rate": 3.66149412078664e-07, + "loss": 0.9244, + "step": 8093 + }, + { + "epoch": 1.68, + "learning_rate": 3.656787552745858e-07, + "loss": 0.918, + "step": 8094 + }, + { + "epoch": 1.68, + "learning_rate": 3.65208381528137e-07, + "loss": 0.9612, + "step": 8095 + }, + { + "epoch": 1.68, + "learning_rate": 3.647382908898593e-07, + "loss": 0.9057, + "step": 8096 + }, + { + "epoch": 1.68, + "learning_rate": 3.642684834102642e-07, + "loss": 1.0122, + "step": 8097 + }, + { + "epoch": 1.68, + "learning_rate": 3.6379895913983317e-07, + "loss": 0.789, + "step": 8098 + }, + { + "epoch": 1.68, + "learning_rate": 3.633297181290153e-07, + "loss": 0.8546, + "step": 8099 + }, + { + "epoch": 1.68, + "learning_rate": 3.628607604282311e-07, + "loss": 0.8189, + "step": 8100 + }, + { + "epoch": 1.68, + "learning_rate": 3.623920860878701e-07, + "loss": 0.8409, + "step": 8101 + }, + { + "epoch": 1.69, + "learning_rate": 3.619236951582916e-07, + "loss": 1.1355, + "step": 8102 + }, + { + "epoch": 1.69, + "learning_rate": 3.6145558768982336e-07, + "loss": 0.9093, + "step": 8103 + }, + { + "epoch": 1.69, + "learning_rate": 3.6098776373276375e-07, + "loss": 0.8066, + "step": 8104 + }, + { + "epoch": 1.69, + "learning_rate": 3.6052022333738065e-07, + "loss": 0.8624, + "step": 8105 + }, + { + "epoch": 1.69, + "learning_rate": 3.600529665539122e-07, + "loss": 0.9899, + "step": 8106 + }, + { + "epoch": 1.69, + "learning_rate": 3.5958599343256313e-07, + "loss": 1.1209, + "step": 8107 + }, + { + "epoch": 1.69, + "learning_rate": 3.591193040235108e-07, + "loss": 0.751, + "step": 8108 + }, + { + "epoch": 1.69, + "learning_rate": 3.5865289837690063e-07, + "loss": 0.9349, + "step": 8109 + }, + { + "epoch": 1.69, + "learning_rate": 3.581867765428486e-07, + "loss": 0.754, + "step": 8110 + }, + { + "epoch": 1.69, + "learning_rate": 3.577209385714386e-07, + "loss": 1.0161, + "step": 8111 + }, + { + "epoch": 1.69, + "learning_rate": 3.5725538451272497e-07, + "loss": 0.8187, + "step": 8112 + }, + { + "epoch": 1.69, + "learning_rate": 3.567901144167317e-07, + "loss": 0.948, + "step": 8113 + }, + { + "epoch": 1.69, + "learning_rate": 3.5632512833345275e-07, + "loss": 0.8115, + "step": 8114 + }, + { + "epoch": 1.69, + "learning_rate": 3.558604263128495e-07, + "loss": 0.8064, + "step": 8115 + }, + { + "epoch": 1.69, + "learning_rate": 3.5539600840485474e-07, + "loss": 0.9677, + "step": 8116 + }, + { + "epoch": 1.69, + "learning_rate": 3.5493187465937014e-07, + "loss": 0.977, + "step": 8117 + }, + { + "epoch": 1.69, + "learning_rate": 3.544680251262675e-07, + "loss": 0.9543, + "step": 8118 + }, + { + "epoch": 1.69, + "learning_rate": 3.54004459855386e-07, + "loss": 0.6864, + "step": 8119 + }, + { + "epoch": 1.69, + "learning_rate": 3.535411788965367e-07, + "loss": 1.0175, + "step": 8120 + }, + { + "epoch": 1.69, + "learning_rate": 3.5307818229949884e-07, + "loss": 0.9304, + "step": 8121 + }, + { + "epoch": 1.69, + "learning_rate": 3.5261547011402193e-07, + "loss": 0.9371, + "step": 8122 + }, + { + "epoch": 1.69, + "learning_rate": 3.5215304238982326e-07, + "loss": 0.8502, + "step": 8123 + }, + { + "epoch": 1.69, + "learning_rate": 3.5169089917659136e-07, + "loss": 0.8415, + "step": 8124 + }, + { + "epoch": 1.69, + "learning_rate": 3.5122904052398317e-07, + "loss": 1.061, + "step": 8125 + }, + { + "epoch": 1.69, + "learning_rate": 3.507674664816264e-07, + "loss": 0.7724, + "step": 8126 + }, + { + "epoch": 1.69, + "learning_rate": 3.5030617709911563e-07, + "loss": 1.2068, + "step": 8127 + }, + { + "epoch": 1.69, + "learning_rate": 3.498451724260169e-07, + "loss": 0.8585, + "step": 8128 + }, + { + "epoch": 1.69, + "learning_rate": 3.493844525118652e-07, + "loss": 0.8791, + "step": 8129 + }, + { + "epoch": 1.69, + "learning_rate": 3.4892401740616574e-07, + "loss": 1.2181, + "step": 8130 + }, + { + "epoch": 1.69, + "learning_rate": 3.4846386715839086e-07, + "loss": 1.2071, + "step": 8131 + }, + { + "epoch": 1.69, + "learning_rate": 3.480040018179843e-07, + "loss": 1.1786, + "step": 8132 + }, + { + "epoch": 1.69, + "learning_rate": 3.475444214343583e-07, + "loss": 0.8151, + "step": 8133 + }, + { + "epoch": 1.69, + "learning_rate": 3.470851260568959e-07, + "loss": 0.9152, + "step": 8134 + }, + { + "epoch": 1.69, + "learning_rate": 3.466261157349464e-07, + "loss": 1.0187, + "step": 8135 + }, + { + "epoch": 1.69, + "learning_rate": 3.461673905178316e-07, + "loss": 0.8563, + "step": 8136 + }, + { + "epoch": 1.69, + "learning_rate": 3.457089504548412e-07, + "loss": 0.976, + "step": 8137 + }, + { + "epoch": 1.69, + "learning_rate": 3.4525079559523533e-07, + "loss": 0.7005, + "step": 8138 + }, + { + "epoch": 1.69, + "learning_rate": 3.447929259882416e-07, + "loss": 0.9026, + "step": 8139 + }, + { + "epoch": 1.69, + "learning_rate": 3.4433534168305813e-07, + "loss": 0.9514, + "step": 8140 + }, + { + "epoch": 1.69, + "learning_rate": 3.4387804272885313e-07, + "loss": 1.1044, + "step": 8141 + }, + { + "epoch": 1.69, + "learning_rate": 3.4342102917476303e-07, + "loss": 0.7633, + "step": 8142 + }, + { + "epoch": 1.69, + "learning_rate": 3.4296430106989354e-07, + "loss": 0.9279, + "step": 8143 + }, + { + "epoch": 1.69, + "learning_rate": 3.425078584633203e-07, + "loss": 0.7961, + "step": 8144 + }, + { + "epoch": 1.69, + "learning_rate": 3.420517014040879e-07, + "loss": 0.761, + "step": 8145 + }, + { + "epoch": 1.69, + "learning_rate": 3.415958299412111e-07, + "loss": 0.8186, + "step": 8146 + }, + { + "epoch": 1.69, + "learning_rate": 3.4114024412367193e-07, + "loss": 0.9123, + "step": 8147 + }, + { + "epoch": 1.69, + "learning_rate": 3.4068494400042404e-07, + "loss": 0.724, + "step": 8148 + }, + { + "epoch": 1.69, + "learning_rate": 3.4022992962038925e-07, + "loss": 1.0796, + "step": 8149 + }, + { + "epoch": 1.7, + "learning_rate": 3.397752010324591e-07, + "loss": 0.781, + "step": 8150 + }, + { + "epoch": 1.7, + "learning_rate": 3.3932075828549314e-07, + "loss": 0.8432, + "step": 8151 + }, + { + "epoch": 1.7, + "learning_rate": 3.3886660142832203e-07, + "loss": 0.7944, + "step": 8152 + }, + { + "epoch": 1.7, + "learning_rate": 3.3841273050974477e-07, + "loss": 0.7252, + "step": 8153 + }, + { + "epoch": 1.7, + "learning_rate": 3.379591455785299e-07, + "loss": 0.9669, + "step": 8154 + }, + { + "epoch": 1.7, + "learning_rate": 3.3750584668341453e-07, + "loss": 0.8581, + "step": 8155 + }, + { + "epoch": 1.7, + "learning_rate": 3.37052833873106e-07, + "loss": 0.7845, + "step": 8156 + }, + { + "epoch": 1.7, + "learning_rate": 3.366001071962804e-07, + "loss": 0.8936, + "step": 8157 + }, + { + "epoch": 1.7, + "learning_rate": 3.361476667015838e-07, + "loss": 0.7537, + "step": 8158 + }, + { + "epoch": 1.7, + "learning_rate": 3.356955124376296e-07, + "loss": 1.0799, + "step": 8159 + }, + { + "epoch": 1.7, + "learning_rate": 3.352436444530027e-07, + "loss": 0.7489, + "step": 8160 + }, + { + "epoch": 1.7, + "learning_rate": 3.347920627962559e-07, + "loss": 1.2789, + "step": 8161 + }, + { + "epoch": 1.7, + "learning_rate": 3.3434076751591225e-07, + "loss": 0.8822, + "step": 8162 + }, + { + "epoch": 1.7, + "learning_rate": 3.3388975866046246e-07, + "loss": 0.8805, + "step": 8163 + }, + { + "epoch": 1.7, + "learning_rate": 3.33439036278368e-07, + "loss": 0.8482, + "step": 8164 + }, + { + "epoch": 1.7, + "learning_rate": 3.329886004180586e-07, + "loss": 0.891, + "step": 8165 + }, + { + "epoch": 1.7, + "learning_rate": 3.3253845112793467e-07, + "loss": 0.7625, + "step": 8166 + }, + { + "epoch": 1.7, + "learning_rate": 3.320885884563627e-07, + "loss": 0.9745, + "step": 8167 + }, + { + "epoch": 1.7, + "learning_rate": 3.3163901245168195e-07, + "loss": 0.87, + "step": 8168 + }, + { + "epoch": 1.7, + "learning_rate": 3.3118972316219875e-07, + "loss": 0.9544, + "step": 8169 + }, + { + "epoch": 1.7, + "learning_rate": 3.3074072063618985e-07, + "loss": 1.0106, + "step": 8170 + }, + { + "epoch": 1.7, + "learning_rate": 3.302920049218997e-07, + "loss": 0.7971, + "step": 8171 + }, + { + "epoch": 1.7, + "learning_rate": 3.29843576067543e-07, + "loss": 0.7658, + "step": 8172 + }, + { + "epoch": 1.7, + "learning_rate": 3.2939543412130357e-07, + "loss": 1.1107, + "step": 8173 + }, + { + "epoch": 1.7, + "learning_rate": 3.2894757913133457e-07, + "loss": 0.8406, + "step": 8174 + }, + { + "epoch": 1.7, + "learning_rate": 3.285000111457571e-07, + "loss": 1.0326, + "step": 8175 + }, + { + "epoch": 1.7, + "learning_rate": 3.280527302126628e-07, + "loss": 0.8816, + "step": 8176 + }, + { + "epoch": 1.7, + "learning_rate": 3.2760573638011217e-07, + "loss": 0.9683, + "step": 8177 + }, + { + "epoch": 1.7, + "learning_rate": 3.2715902969613477e-07, + "loss": 0.8516, + "step": 8178 + }, + { + "epoch": 1.7, + "learning_rate": 3.2671261020872854e-07, + "loss": 0.7938, + "step": 8179 + }, + { + "epoch": 1.7, + "learning_rate": 3.262664779658615e-07, + "loss": 0.9976, + "step": 8180 + }, + { + "epoch": 1.7, + "learning_rate": 3.258206330154706e-07, + "loss": 1.0757, + "step": 8181 + }, + { + "epoch": 1.7, + "learning_rate": 3.2537507540546217e-07, + "loss": 0.9211, + "step": 8182 + }, + { + "epoch": 1.7, + "learning_rate": 3.24929805183711e-07, + "loss": 0.825, + "step": 8183 + }, + { + "epoch": 1.7, + "learning_rate": 3.24484822398061e-07, + "loss": 0.9945, + "step": 8184 + }, + { + "epoch": 1.7, + "learning_rate": 3.2404012709632566e-07, + "loss": 0.8818, + "step": 8185 + }, + { + "epoch": 1.7, + "learning_rate": 3.2359571932628884e-07, + "loss": 0.9889, + "step": 8186 + }, + { + "epoch": 1.7, + "learning_rate": 3.2315159913570025e-07, + "loss": 0.8909, + "step": 8187 + }, + { + "epoch": 1.7, + "learning_rate": 3.227077665722811e-07, + "loss": 0.9768, + "step": 8188 + }, + { + "epoch": 1.7, + "learning_rate": 3.222642216837215e-07, + "loss": 1.0625, + "step": 8189 + }, + { + "epoch": 1.7, + "learning_rate": 3.218209645176811e-07, + "loss": 0.9741, + "step": 8190 + }, + { + "epoch": 1.7, + "learning_rate": 3.21377995121786e-07, + "loss": 0.902, + "step": 8191 + }, + { + "epoch": 1.7, + "learning_rate": 3.209353135436343e-07, + "loss": 0.9376, + "step": 8192 + }, + { + "epoch": 1.7, + "learning_rate": 3.2049291983079176e-07, + "loss": 0.846, + "step": 8193 + }, + { + "epoch": 1.7, + "learning_rate": 3.200508140307945e-07, + "loss": 1.2197, + "step": 8194 + }, + { + "epoch": 1.7, + "learning_rate": 3.196089961911455e-07, + "loss": 0.9164, + "step": 8195 + }, + { + "epoch": 1.7, + "learning_rate": 3.191674663593187e-07, + "loss": 1.0211, + "step": 8196 + }, + { + "epoch": 1.7, + "learning_rate": 3.1872622458275624e-07, + "loss": 0.9487, + "step": 8197 + }, + { + "epoch": 1.71, + "learning_rate": 3.182852709088702e-07, + "loss": 1.0808, + "step": 8198 + }, + { + "epoch": 1.71, + "learning_rate": 3.178446053850399e-07, + "loss": 0.89, + "step": 8199 + }, + { + "epoch": 1.71, + "learning_rate": 3.174042280586152e-07, + "loss": 0.7864, + "step": 8200 + }, + { + "epoch": 1.71, + "learning_rate": 3.16964138976915e-07, + "loss": 0.7512, + "step": 8201 + }, + { + "epoch": 1.71, + "learning_rate": 3.1652433818722704e-07, + "loss": 0.954, + "step": 8202 + }, + { + "epoch": 1.71, + "learning_rate": 3.1608482573680684e-07, + "loss": 0.8508, + "step": 8203 + }, + { + "epoch": 1.71, + "learning_rate": 3.156456016728807e-07, + "loss": 1.0839, + "step": 8204 + }, + { + "epoch": 1.71, + "learning_rate": 3.1520666604264314e-07, + "loss": 0.9729, + "step": 8205 + }, + { + "epoch": 1.71, + "learning_rate": 3.1476801889325814e-07, + "loss": 0.9791, + "step": 8206 + }, + { + "epoch": 1.71, + "learning_rate": 3.1432966027185763e-07, + "loss": 1.0023, + "step": 8207 + }, + { + "epoch": 1.71, + "learning_rate": 3.1389159022554323e-07, + "loss": 0.9817, + "step": 8208 + }, + { + "epoch": 1.71, + "learning_rate": 3.1345380880138575e-07, + "loss": 0.9138, + "step": 8209 + }, + { + "epoch": 1.71, + "learning_rate": 3.1301631604642546e-07, + "loss": 0.9273, + "step": 8210 + }, + { + "epoch": 1.71, + "learning_rate": 3.1257911200766974e-07, + "loss": 0.7316, + "step": 8211 + }, + { + "epoch": 1.71, + "learning_rate": 3.121421967320964e-07, + "loss": 0.8015, + "step": 8212 + }, + { + "epoch": 1.71, + "learning_rate": 3.117055702666526e-07, + "loss": 0.7432, + "step": 8213 + }, + { + "epoch": 1.71, + "learning_rate": 3.11269232658254e-07, + "loss": 0.7637, + "step": 8214 + }, + { + "epoch": 1.71, + "learning_rate": 3.1083318395378355e-07, + "loss": 0.8206, + "step": 8215 + }, + { + "epoch": 1.71, + "learning_rate": 3.1039742420009576e-07, + "loss": 0.7094, + "step": 8216 + }, + { + "epoch": 1.71, + "learning_rate": 3.0996195344401313e-07, + "loss": 0.7499, + "step": 8217 + }, + { + "epoch": 1.71, + "learning_rate": 3.0952677173232693e-07, + "loss": 0.8856, + "step": 8218 + }, + { + "epoch": 1.71, + "learning_rate": 3.09091879111796e-07, + "loss": 0.8248, + "step": 8219 + }, + { + "epoch": 1.71, + "learning_rate": 3.086572756291518e-07, + "loss": 0.8374, + "step": 8220 + }, + { + "epoch": 1.71, + "learning_rate": 3.082229613310905e-07, + "loss": 1.1268, + "step": 8221 + }, + { + "epoch": 1.71, + "learning_rate": 3.0778893626428083e-07, + "loss": 0.9044, + "step": 8222 + }, + { + "epoch": 1.71, + "learning_rate": 3.073552004753565e-07, + "loss": 1.2934, + "step": 8223 + }, + { + "epoch": 1.71, + "learning_rate": 3.0692175401092483e-07, + "loss": 0.7397, + "step": 8224 + }, + { + "epoch": 1.71, + "learning_rate": 3.0648859691755826e-07, + "loss": 0.9173, + "step": 8225 + }, + { + "epoch": 1.71, + "learning_rate": 3.060557292417999e-07, + "loss": 0.9042, + "step": 8226 + }, + { + "epoch": 1.71, + "learning_rate": 3.0562315103016025e-07, + "loss": 0.9114, + "step": 8227 + }, + { + "epoch": 1.71, + "learning_rate": 3.0519086232912206e-07, + "loss": 0.9121, + "step": 8228 + }, + { + "epoch": 1.71, + "learning_rate": 3.047588631851326e-07, + "loss": 0.8754, + "step": 8229 + }, + { + "epoch": 1.71, + "learning_rate": 3.0432715364461163e-07, + "loss": 0.9912, + "step": 8230 + }, + { + "epoch": 1.71, + "learning_rate": 3.0389573375394476e-07, + "loss": 0.7548, + "step": 8231 + }, + { + "epoch": 1.71, + "learning_rate": 3.034646035594896e-07, + "loss": 0.9581, + "step": 8232 + }, + { + "epoch": 1.71, + "learning_rate": 3.0303376310757003e-07, + "loss": 0.9557, + "step": 8233 + }, + { + "epoch": 1.71, + "learning_rate": 3.026032124444807e-07, + "loss": 0.8762, + "step": 8234 + }, + { + "epoch": 1.71, + "learning_rate": 3.021729516164824e-07, + "loss": 0.8008, + "step": 8235 + }, + { + "epoch": 1.71, + "learning_rate": 3.017429806698093e-07, + "loss": 0.8546, + "step": 8236 + }, + { + "epoch": 1.71, + "learning_rate": 3.013132996506599e-07, + "loss": 0.8714, + "step": 8237 + }, + { + "epoch": 1.71, + "learning_rate": 3.008839086052042e-07, + "loss": 0.8654, + "step": 8238 + }, + { + "epoch": 1.71, + "learning_rate": 3.00454807579579e-07, + "loss": 1.0182, + "step": 8239 + }, + { + "epoch": 1.71, + "learning_rate": 3.0002599661989307e-07, + "loss": 0.8565, + "step": 8240 + }, + { + "epoch": 1.71, + "learning_rate": 2.995974757722205e-07, + "loss": 1.0045, + "step": 8241 + }, + { + "epoch": 1.71, + "learning_rate": 2.991692450826072e-07, + "loss": 0.828, + "step": 8242 + }, + { + "epoch": 1.71, + "learning_rate": 2.9874130459706474e-07, + "loss": 1.1619, + "step": 8243 + }, + { + "epoch": 1.71, + "learning_rate": 2.983136543615772e-07, + "loss": 0.9915, + "step": 8244 + }, + { + "epoch": 1.71, + "learning_rate": 2.9788629442209423e-07, + "loss": 0.7929, + "step": 8245 + }, + { + "epoch": 1.72, + "learning_rate": 2.974592248245368e-07, + "loss": 0.9426, + "step": 8246 + }, + { + "epoch": 1.72, + "learning_rate": 2.970324456147915e-07, + "loss": 0.948, + "step": 8247 + }, + { + "epoch": 1.72, + "learning_rate": 2.9660595683871826e-07, + "loss": 0.8337, + "step": 8248 + }, + { + "epoch": 1.72, + "learning_rate": 2.961797585421415e-07, + "loss": 0.9395, + "step": 8249 + }, + { + "epoch": 1.72, + "learning_rate": 2.9575385077085724e-07, + "loss": 0.8488, + "step": 8250 + }, + { + "epoch": 1.72, + "learning_rate": 2.9532823357062787e-07, + "loss": 1.0389, + "step": 8251 + }, + { + "epoch": 1.72, + "learning_rate": 2.949029069871878e-07, + "loss": 0.8083, + "step": 8252 + }, + { + "epoch": 1.72, + "learning_rate": 2.944778710662365e-07, + "loss": 0.9471, + "step": 8253 + }, + { + "epoch": 1.72, + "learning_rate": 2.940531258534457e-07, + "loss": 1.0087, + "step": 8254 + }, + { + "epoch": 1.72, + "learning_rate": 2.9362867139445224e-07, + "loss": 0.876, + "step": 8255 + }, + { + "epoch": 1.72, + "learning_rate": 2.932045077348661e-07, + "loss": 0.9568, + "step": 8256 + }, + { + "epoch": 1.72, + "learning_rate": 2.92780634920262e-07, + "loss": 0.8679, + "step": 8257 + }, + { + "epoch": 1.72, + "learning_rate": 2.9235705299618587e-07, + "loss": 0.8319, + "step": 8258 + }, + { + "epoch": 1.72, + "learning_rate": 2.9193376200815003e-07, + "loss": 0.8398, + "step": 8259 + }, + { + "epoch": 1.72, + "learning_rate": 2.915107620016394e-07, + "loss": 1.2952, + "step": 8260 + }, + { + "epoch": 1.72, + "learning_rate": 2.910880530221036e-07, + "loss": 0.9001, + "step": 8261 + }, + { + "epoch": 1.72, + "learning_rate": 2.90665635114964e-07, + "loss": 1.0217, + "step": 8262 + }, + { + "epoch": 1.72, + "learning_rate": 2.90243508325607e-07, + "loss": 0.8097, + "step": 8263 + }, + { + "epoch": 1.72, + "learning_rate": 2.898216726993932e-07, + "loss": 1.0769, + "step": 8264 + }, + { + "epoch": 1.72, + "learning_rate": 2.894001282816464e-07, + "loss": 0.6935, + "step": 8265 + }, + { + "epoch": 1.72, + "learning_rate": 2.8897887511766307e-07, + "loss": 0.7199, + "step": 8266 + }, + { + "epoch": 1.72, + "learning_rate": 2.8855791325270496e-07, + "loss": 1.0554, + "step": 8267 + }, + { + "epoch": 1.72, + "learning_rate": 2.881372427320069e-07, + "loss": 0.8693, + "step": 8268 + }, + { + "epoch": 1.72, + "learning_rate": 2.8771686360076774e-07, + "loss": 1.0646, + "step": 8269 + }, + { + "epoch": 1.72, + "learning_rate": 2.872967759041587e-07, + "loss": 0.827, + "step": 8270 + }, + { + "epoch": 1.72, + "learning_rate": 2.868769796873166e-07, + "loss": 0.8817, + "step": 8271 + }, + { + "epoch": 1.72, + "learning_rate": 2.8645747499535046e-07, + "loss": 1.0038, + "step": 8272 + }, + { + "epoch": 1.72, + "learning_rate": 2.8603826187333414e-07, + "loss": 0.8198, + "step": 8273 + }, + { + "epoch": 1.72, + "learning_rate": 2.85619340366314e-07, + "loss": 1.0184, + "step": 8274 + }, + { + "epoch": 1.72, + "learning_rate": 2.852007105193006e-07, + "loss": 1.0134, + "step": 8275 + }, + { + "epoch": 1.72, + "learning_rate": 2.847823723772781e-07, + "loss": 0.7844, + "step": 8276 + }, + { + "epoch": 1.72, + "learning_rate": 2.843643259851958e-07, + "loss": 0.9915, + "step": 8277 + }, + { + "epoch": 1.72, + "learning_rate": 2.8394657138797307e-07, + "loss": 0.9971, + "step": 8278 + }, + { + "epoch": 1.72, + "learning_rate": 2.8352910863049643e-07, + "loss": 1.0649, + "step": 8279 + }, + { + "epoch": 1.72, + "learning_rate": 2.831119377576246e-07, + "loss": 0.8886, + "step": 8280 + }, + { + "epoch": 1.72, + "learning_rate": 2.8269505881418033e-07, + "loss": 0.9326, + "step": 8281 + }, + { + "epoch": 1.72, + "learning_rate": 2.822784718449586e-07, + "loss": 0.831, + "step": 8282 + }, + { + "epoch": 1.72, + "learning_rate": 2.818621768947202e-07, + "loss": 0.9506, + "step": 8283 + }, + { + "epoch": 1.72, + "learning_rate": 2.8144617400819797e-07, + "loss": 0.8313, + "step": 8284 + }, + { + "epoch": 1.72, + "learning_rate": 2.8103046323008984e-07, + "loss": 1.0113, + "step": 8285 + }, + { + "epoch": 1.72, + "learning_rate": 2.806150446050645e-07, + "loss": 0.9598, + "step": 8286 + }, + { + "epoch": 1.72, + "learning_rate": 2.801999181777581e-07, + "loss": 0.8973, + "step": 8287 + }, + { + "epoch": 1.72, + "learning_rate": 2.7978508399277746e-07, + "loss": 0.8961, + "step": 8288 + }, + { + "epoch": 1.72, + "learning_rate": 2.793705420946948e-07, + "loss": 0.7348, + "step": 8289 + }, + { + "epoch": 1.72, + "learning_rate": 2.789562925280532e-07, + "loss": 0.8287, + "step": 8290 + }, + { + "epoch": 1.72, + "learning_rate": 2.7854233533736375e-07, + "loss": 0.9395, + "step": 8291 + }, + { + "epoch": 1.72, + "learning_rate": 2.781286705671068e-07, + "loss": 0.7509, + "step": 8292 + }, + { + "epoch": 1.72, + "learning_rate": 2.777152982617295e-07, + "loss": 0.8911, + "step": 8293 + }, + { + "epoch": 1.73, + "learning_rate": 2.77302218465649e-07, + "loss": 0.9557, + "step": 8294 + }, + { + "epoch": 1.73, + "learning_rate": 2.768894312232507e-07, + "loss": 0.8702, + "step": 8295 + }, + { + "epoch": 1.73, + "learning_rate": 2.7647693657888963e-07, + "loss": 0.7279, + "step": 8296 + }, + { + "epoch": 1.73, + "learning_rate": 2.760647345768865e-07, + "loss": 0.9198, + "step": 8297 + }, + { + "epoch": 1.73, + "learning_rate": 2.756528252615334e-07, + "loss": 0.9519, + "step": 8298 + }, + { + "epoch": 1.73, + "learning_rate": 2.7524120867708967e-07, + "loss": 0.9801, + "step": 8299 + }, + { + "epoch": 1.73, + "learning_rate": 2.748298848677845e-07, + "loss": 0.853, + "step": 8300 + }, + { + "epoch": 1.73, + "learning_rate": 2.7441885387781276e-07, + "loss": 0.8844, + "step": 8301 + }, + { + "epoch": 1.73, + "learning_rate": 2.740081157513411e-07, + "loss": 1.2271, + "step": 8302 + }, + { + "epoch": 1.73, + "learning_rate": 2.735976705325026e-07, + "loss": 0.9797, + "step": 8303 + }, + { + "epoch": 1.73, + "learning_rate": 2.7318751826540033e-07, + "loss": 0.7968, + "step": 8304 + }, + { + "epoch": 1.73, + "learning_rate": 2.7277765899410435e-07, + "loss": 0.7908, + "step": 8305 + }, + { + "epoch": 1.73, + "learning_rate": 2.723680927626544e-07, + "loss": 0.7797, + "step": 8306 + }, + { + "epoch": 1.73, + "learning_rate": 2.7195881961505795e-07, + "loss": 0.8094, + "step": 8307 + }, + { + "epoch": 1.73, + "learning_rate": 2.7154983959529223e-07, + "loss": 1.1067, + "step": 8308 + }, + { + "epoch": 1.73, + "learning_rate": 2.71141152747301e-07, + "loss": 1.3726, + "step": 8309 + }, + { + "epoch": 1.73, + "learning_rate": 2.7073275911499853e-07, + "loss": 0.9544, + "step": 8310 + }, + { + "epoch": 1.73, + "learning_rate": 2.70324658742266e-07, + "loss": 0.7113, + "step": 8311 + }, + { + "epoch": 1.73, + "learning_rate": 2.6991685167295464e-07, + "loss": 1.203, + "step": 8312 + }, + { + "epoch": 1.73, + "learning_rate": 2.6950933795088216e-07, + "loss": 0.9046, + "step": 8313 + }, + { + "epoch": 1.73, + "learning_rate": 2.6910211761983684e-07, + "loss": 0.858, + "step": 8314 + }, + { + "epoch": 1.73, + "learning_rate": 2.6869519072357365e-07, + "loss": 1.0448, + "step": 8315 + }, + { + "epoch": 1.73, + "learning_rate": 2.6828855730581826e-07, + "loss": 0.7881, + "step": 8316 + }, + { + "epoch": 1.73, + "learning_rate": 2.6788221741026177e-07, + "loss": 0.7479, + "step": 8317 + }, + { + "epoch": 1.73, + "learning_rate": 2.674761710805659e-07, + "loss": 0.7672, + "step": 8318 + }, + { + "epoch": 1.73, + "learning_rate": 2.670704183603604e-07, + "loss": 0.8514, + "step": 8319 + }, + { + "epoch": 1.73, + "learning_rate": 2.666649592932444e-07, + "loss": 0.8064, + "step": 8320 + }, + { + "epoch": 1.73, + "learning_rate": 2.662597939227824e-07, + "loss": 0.8233, + "step": 8321 + }, + { + "epoch": 1.73, + "learning_rate": 2.6585492229251095e-07, + "loss": 1.1701, + "step": 8322 + }, + { + "epoch": 1.73, + "learning_rate": 2.6545034444593285e-07, + "loss": 0.8552, + "step": 8323 + }, + { + "epoch": 1.73, + "learning_rate": 2.650460604265204e-07, + "loss": 0.8876, + "step": 8324 + }, + { + "epoch": 1.73, + "learning_rate": 2.6464207027771337e-07, + "loss": 0.9872, + "step": 8325 + }, + { + "epoch": 1.73, + "learning_rate": 2.642383740429205e-07, + "loss": 1.0416, + "step": 8326 + }, + { + "epoch": 1.73, + "learning_rate": 2.638349717655194e-07, + "loss": 0.9748, + "step": 8327 + }, + { + "epoch": 1.73, + "learning_rate": 2.634318634888554e-07, + "loss": 0.7033, + "step": 8328 + }, + { + "epoch": 1.73, + "learning_rate": 2.6302904925624217e-07, + "loss": 0.903, + "step": 8329 + }, + { + "epoch": 1.73, + "learning_rate": 2.626265291109624e-07, + "loss": 0.9092, + "step": 8330 + }, + { + "epoch": 1.73, + "learning_rate": 2.6222430309626653e-07, + "loss": 0.8734, + "step": 8331 + }, + { + "epoch": 1.73, + "learning_rate": 2.618223712553747e-07, + "loss": 1.0344, + "step": 8332 + }, + { + "epoch": 1.73, + "learning_rate": 2.614207336314729e-07, + "loss": 0.9636, + "step": 8333 + }, + { + "epoch": 1.73, + "learning_rate": 2.610193902677177e-07, + "loss": 0.8611, + "step": 8334 + }, + { + "epoch": 1.73, + "learning_rate": 2.6061834120723395e-07, + "loss": 0.8371, + "step": 8335 + }, + { + "epoch": 1.73, + "learning_rate": 2.6021758649311443e-07, + "loss": 0.903, + "step": 8336 + }, + { + "epoch": 1.73, + "learning_rate": 2.598171261684191e-07, + "loss": 1.1283, + "step": 8337 + }, + { + "epoch": 1.73, + "learning_rate": 2.594169602761779e-07, + "loss": 0.765, + "step": 8338 + }, + { + "epoch": 1.73, + "learning_rate": 2.59017088859389e-07, + "loss": 1.0879, + "step": 8339 + }, + { + "epoch": 1.73, + "learning_rate": 2.586175119610188e-07, + "loss": 0.8187, + "step": 8340 + }, + { + "epoch": 1.73, + "learning_rate": 2.5821822962400057e-07, + "loss": 1.0289, + "step": 8341 + }, + { + "epoch": 1.74, + "learning_rate": 2.5781924189123797e-07, + "loss": 0.951, + "step": 8342 + }, + { + "epoch": 1.74, + "learning_rate": 2.574205488056024e-07, + "loss": 0.9965, + "step": 8343 + }, + { + "epoch": 1.74, + "learning_rate": 2.570221504099336e-07, + "loss": 1.0664, + "step": 8344 + }, + { + "epoch": 1.74, + "learning_rate": 2.566240467470383e-07, + "loss": 0.8509, + "step": 8345 + }, + { + "epoch": 1.74, + "learning_rate": 2.5622623785969336e-07, + "loss": 1.0041, + "step": 8346 + }, + { + "epoch": 1.74, + "learning_rate": 2.558287237906438e-07, + "loss": 1.0232, + "step": 8347 + }, + { + "epoch": 1.74, + "learning_rate": 2.5543150458260224e-07, + "loss": 1.1568, + "step": 8348 + }, + { + "epoch": 1.74, + "learning_rate": 2.550345802782491e-07, + "loss": 0.7803, + "step": 8349 + }, + { + "epoch": 1.74, + "learning_rate": 2.546379509202347e-07, + "loss": 0.9046, + "step": 8350 + }, + { + "epoch": 1.74, + "learning_rate": 2.5424161655117616e-07, + "loss": 0.748, + "step": 8351 + }, + { + "epoch": 1.74, + "learning_rate": 2.5384557721366084e-07, + "loss": 0.867, + "step": 8352 + }, + { + "epoch": 1.74, + "learning_rate": 2.534498329502414e-07, + "loss": 1.1901, + "step": 8353 + }, + { + "epoch": 1.74, + "learning_rate": 2.5305438380344206e-07, + "loss": 0.7008, + "step": 8354 + }, + { + "epoch": 1.74, + "learning_rate": 2.5265922981575286e-07, + "loss": 0.9918, + "step": 8355 + }, + { + "epoch": 1.74, + "learning_rate": 2.5226437102963386e-07, + "loss": 0.7941, + "step": 8356 + }, + { + "epoch": 1.74, + "learning_rate": 2.5186980748751143e-07, + "loss": 1.0346, + "step": 8357 + }, + { + "epoch": 1.74, + "learning_rate": 2.5147553923178235e-07, + "loss": 0.8304, + "step": 8358 + }, + { + "epoch": 1.74, + "learning_rate": 2.510815663048104e-07, + "loss": 0.7131, + "step": 8359 + }, + { + "epoch": 1.74, + "learning_rate": 2.5068788874892837e-07, + "loss": 0.9564, + "step": 8360 + }, + { + "epoch": 1.74, + "learning_rate": 2.5029450660643603e-07, + "loss": 0.8812, + "step": 8361 + }, + { + "epoch": 1.74, + "learning_rate": 2.499014199196027e-07, + "loss": 1.1033, + "step": 8362 + }, + { + "epoch": 1.74, + "learning_rate": 2.4950862873066583e-07, + "loss": 0.877, + "step": 8363 + }, + { + "epoch": 1.74, + "learning_rate": 2.491161330818307e-07, + "loss": 1.1383, + "step": 8364 + }, + { + "epoch": 1.74, + "learning_rate": 2.487239330152705e-07, + "loss": 0.8328, + "step": 8365 + }, + { + "epoch": 1.74, + "learning_rate": 2.483320285731273e-07, + "loss": 0.6989, + "step": 8366 + }, + { + "epoch": 1.74, + "learning_rate": 2.4794041979751103e-07, + "loss": 0.859, + "step": 8367 + }, + { + "epoch": 1.74, + "learning_rate": 2.47549106730501e-07, + "loss": 0.778, + "step": 8368 + }, + { + "epoch": 1.74, + "learning_rate": 2.471580894141426e-07, + "loss": 0.8627, + "step": 8369 + }, + { + "epoch": 1.74, + "learning_rate": 2.4676736789045097e-07, + "loss": 0.8972, + "step": 8370 + }, + { + "epoch": 1.74, + "learning_rate": 2.463769422014094e-07, + "loss": 0.8376, + "step": 8371 + }, + { + "epoch": 1.74, + "learning_rate": 2.4598681238896904e-07, + "loss": 1.051, + "step": 8372 + }, + { + "epoch": 1.74, + "learning_rate": 2.45596978495049e-07, + "loss": 0.7442, + "step": 8373 + }, + { + "epoch": 1.74, + "learning_rate": 2.452074405615369e-07, + "loss": 0.9254, + "step": 8374 + }, + { + "epoch": 1.74, + "learning_rate": 2.448181986302888e-07, + "loss": 0.7524, + "step": 8375 + }, + { + "epoch": 1.74, + "learning_rate": 2.444292527431289e-07, + "loss": 0.8102, + "step": 8376 + }, + { + "epoch": 1.74, + "learning_rate": 2.4404060294184883e-07, + "loss": 1.039, + "step": 8377 + }, + { + "epoch": 1.74, + "learning_rate": 2.436522492682095e-07, + "loss": 0.7539, + "step": 8378 + }, + { + "epoch": 1.74, + "learning_rate": 2.432641917639392e-07, + "loss": 0.8763, + "step": 8379 + }, + { + "epoch": 1.74, + "learning_rate": 2.4287643047073526e-07, + "loss": 0.823, + "step": 8380 + }, + { + "epoch": 1.74, + "learning_rate": 2.4248896543026134e-07, + "loss": 0.7923, + "step": 8381 + }, + { + "epoch": 1.74, + "learning_rate": 2.421017966841518e-07, + "loss": 0.759, + "step": 8382 + }, + { + "epoch": 1.74, + "learning_rate": 2.41714924274007e-07, + "loss": 0.7043, + "step": 8383 + }, + { + "epoch": 1.74, + "learning_rate": 2.413283482413974e-07, + "loss": 0.649, + "step": 8384 + }, + { + "epoch": 1.74, + "learning_rate": 2.409420686278594e-07, + "loss": 0.7221, + "step": 8385 + }, + { + "epoch": 1.74, + "learning_rate": 2.4055608547489915e-07, + "loss": 0.884, + "step": 8386 + }, + { + "epoch": 1.74, + "learning_rate": 2.401703988239908e-07, + "loss": 0.9378, + "step": 8387 + }, + { + "epoch": 1.74, + "learning_rate": 2.397850087165766e-07, + "loss": 0.8673, + "step": 8388 + }, + { + "epoch": 1.74, + "learning_rate": 2.39399915194066e-07, + "loss": 1.0816, + "step": 8389 + }, + { + "epoch": 1.75, + "learning_rate": 2.390151182978373e-07, + "loss": 0.7852, + "step": 8390 + }, + { + "epoch": 1.75, + "learning_rate": 2.3863061806923703e-07, + "loss": 0.9791, + "step": 8391 + }, + { + "epoch": 1.75, + "learning_rate": 2.3824641454958017e-07, + "loss": 0.8425, + "step": 8392 + }, + { + "epoch": 1.75, + "learning_rate": 2.3786250778014874e-07, + "loss": 1.0891, + "step": 8393 + }, + { + "epoch": 1.75, + "learning_rate": 2.3747889780219367e-07, + "loss": 0.7327, + "step": 8394 + }, + { + "epoch": 1.75, + "learning_rate": 2.3709558465693405e-07, + "loss": 0.9099, + "step": 8395 + }, + { + "epoch": 1.75, + "learning_rate": 2.3671256838555688e-07, + "loss": 0.7309, + "step": 8396 + }, + { + "epoch": 1.75, + "learning_rate": 2.3632984902921695e-07, + "loss": 0.7133, + "step": 8397 + }, + { + "epoch": 1.75, + "learning_rate": 2.3594742662903734e-07, + "loss": 0.8115, + "step": 8398 + }, + { + "epoch": 1.75, + "learning_rate": 2.3556530122610953e-07, + "loss": 0.7641, + "step": 8399 + }, + { + "epoch": 1.75, + "learning_rate": 2.3518347286149332e-07, + "loss": 0.8102, + "step": 8400 + }, + { + "epoch": 1.75, + "learning_rate": 2.3480194157621493e-07, + "loss": 0.7791, + "step": 8401 + }, + { + "epoch": 1.75, + "learning_rate": 2.3442070741127087e-07, + "loss": 0.887, + "step": 8402 + }, + { + "epoch": 1.75, + "learning_rate": 2.3403977040762437e-07, + "loss": 0.9863, + "step": 8403 + }, + { + "epoch": 1.75, + "learning_rate": 2.336591306062077e-07, + "loss": 0.964, + "step": 8404 + }, + { + "epoch": 1.75, + "learning_rate": 2.3327878804791947e-07, + "loss": 0.9146, + "step": 8405 + }, + { + "epoch": 1.75, + "learning_rate": 2.3289874277362832e-07, + "loss": 0.9268, + "step": 8406 + }, + { + "epoch": 1.75, + "learning_rate": 2.3251899482416928e-07, + "loss": 0.8979, + "step": 8407 + }, + { + "epoch": 1.75, + "learning_rate": 2.3213954424034766e-07, + "loss": 0.8608, + "step": 8408 + }, + { + "epoch": 1.75, + "learning_rate": 2.317603910629339e-07, + "loss": 0.7617, + "step": 8409 + }, + { + "epoch": 1.75, + "learning_rate": 2.313815353326687e-07, + "loss": 0.8252, + "step": 8410 + }, + { + "epoch": 1.75, + "learning_rate": 2.3100297709025985e-07, + "loss": 0.7954, + "step": 8411 + }, + { + "epoch": 1.75, + "learning_rate": 2.3062471637638382e-07, + "loss": 0.8687, + "step": 8412 + }, + { + "epoch": 1.75, + "learning_rate": 2.302467532316841e-07, + "loss": 0.8955, + "step": 8413 + }, + { + "epoch": 1.75, + "learning_rate": 2.2986908769677317e-07, + "loss": 0.7862, + "step": 8414 + }, + { + "epoch": 1.75, + "learning_rate": 2.2949171981223095e-07, + "loss": 0.7831, + "step": 8415 + }, + { + "epoch": 1.75, + "learning_rate": 2.2911464961860663e-07, + "loss": 0.9513, + "step": 8416 + }, + { + "epoch": 1.75, + "learning_rate": 2.2873787715641448e-07, + "loss": 0.906, + "step": 8417 + }, + { + "epoch": 1.75, + "learning_rate": 2.283614024661398e-07, + "loss": 1.1093, + "step": 8418 + }, + { + "epoch": 1.75, + "learning_rate": 2.2798522558823488e-07, + "loss": 0.7689, + "step": 8419 + }, + { + "epoch": 1.75, + "learning_rate": 2.2760934656311938e-07, + "loss": 0.9273, + "step": 8420 + }, + { + "epoch": 1.75, + "learning_rate": 2.2723376543118236e-07, + "loss": 0.8903, + "step": 8421 + }, + { + "epoch": 1.75, + "learning_rate": 2.2685848223277882e-07, + "loss": 1.0645, + "step": 8422 + }, + { + "epoch": 1.75, + "learning_rate": 2.2648349700823356e-07, + "loss": 1.0127, + "step": 8423 + }, + { + "epoch": 1.75, + "learning_rate": 2.2610880979783865e-07, + "loss": 0.8048, + "step": 8424 + }, + { + "epoch": 1.75, + "learning_rate": 2.2573442064185456e-07, + "loss": 0.8486, + "step": 8425 + }, + { + "epoch": 1.75, + "learning_rate": 2.253603295805088e-07, + "loss": 1.0231, + "step": 8426 + }, + { + "epoch": 1.75, + "learning_rate": 2.2498653665399749e-07, + "loss": 0.7626, + "step": 8427 + }, + { + "epoch": 1.75, + "learning_rate": 2.2461304190248456e-07, + "loss": 1.2366, + "step": 8428 + }, + { + "epoch": 1.75, + "learning_rate": 2.2423984536610288e-07, + "loss": 0.9989, + "step": 8429 + }, + { + "epoch": 1.75, + "learning_rate": 2.2386694708495136e-07, + "loss": 0.821, + "step": 8430 + }, + { + "epoch": 1.75, + "learning_rate": 2.2349434709909832e-07, + "loss": 0.9894, + "step": 8431 + }, + { + "epoch": 1.75, + "learning_rate": 2.231220454485794e-07, + "loss": 0.8654, + "step": 8432 + }, + { + "epoch": 1.75, + "learning_rate": 2.227500421733989e-07, + "loss": 1.0305, + "step": 8433 + }, + { + "epoch": 1.75, + "learning_rate": 2.2237833731352786e-07, + "loss": 0.8464, + "step": 8434 + }, + { + "epoch": 1.75, + "learning_rate": 2.2200693090890634e-07, + "loss": 1.1446, + "step": 8435 + }, + { + "epoch": 1.75, + "learning_rate": 2.2163582299944173e-07, + "loss": 0.8687, + "step": 8436 + }, + { + "epoch": 1.75, + "learning_rate": 2.2126501362501016e-07, + "loss": 0.9822, + "step": 8437 + }, + { + "epoch": 1.75, + "learning_rate": 2.2089450282545443e-07, + "loss": 0.9081, + "step": 8438 + }, + { + "epoch": 1.76, + "learning_rate": 2.2052429064058565e-07, + "loss": 0.8929, + "step": 8439 + }, + { + "epoch": 1.76, + "learning_rate": 2.201543771101837e-07, + "loss": 1.1023, + "step": 8440 + }, + { + "epoch": 1.76, + "learning_rate": 2.197847622739958e-07, + "loss": 0.864, + "step": 8441 + }, + { + "epoch": 1.76, + "learning_rate": 2.1941544617173682e-07, + "loss": 0.7626, + "step": 8442 + }, + { + "epoch": 1.76, + "learning_rate": 2.1904642884308935e-07, + "loss": 0.9156, + "step": 8443 + }, + { + "epoch": 1.76, + "learning_rate": 2.1867771032770466e-07, + "loss": 0.8261, + "step": 8444 + }, + { + "epoch": 1.76, + "learning_rate": 2.183092906652021e-07, + "loss": 0.843, + "step": 8445 + }, + { + "epoch": 1.76, + "learning_rate": 2.1794116989516733e-07, + "loss": 0.832, + "step": 8446 + }, + { + "epoch": 1.76, + "learning_rate": 2.1757334805715535e-07, + "loss": 0.844, + "step": 8447 + }, + { + "epoch": 1.76, + "learning_rate": 2.172058251906882e-07, + "loss": 0.9562, + "step": 8448 + }, + { + "epoch": 1.76, + "learning_rate": 2.1683860133525734e-07, + "loss": 0.9896, + "step": 8449 + }, + { + "epoch": 1.76, + "learning_rate": 2.1647167653031986e-07, + "loss": 0.8887, + "step": 8450 + }, + { + "epoch": 1.76, + "learning_rate": 2.1610505081530186e-07, + "loss": 0.8009, + "step": 8451 + }, + { + "epoch": 1.76, + "learning_rate": 2.157387242295975e-07, + "loss": 0.9511, + "step": 8452 + }, + { + "epoch": 1.76, + "learning_rate": 2.15372696812569e-07, + "loss": 0.7975, + "step": 8453 + }, + { + "epoch": 1.76, + "learning_rate": 2.1500696860354518e-07, + "loss": 0.9414, + "step": 8454 + }, + { + "epoch": 1.76, + "learning_rate": 2.1464153964182364e-07, + "loss": 0.7216, + "step": 8455 + }, + { + "epoch": 1.76, + "learning_rate": 2.142764099666703e-07, + "loss": 0.8764, + "step": 8456 + }, + { + "epoch": 1.76, + "learning_rate": 2.139115796173181e-07, + "loss": 0.856, + "step": 8457 + }, + { + "epoch": 1.76, + "learning_rate": 2.1354704863296735e-07, + "loss": 0.9402, + "step": 8458 + }, + { + "epoch": 1.76, + "learning_rate": 2.1318281705278742e-07, + "loss": 1.0123, + "step": 8459 + }, + { + "epoch": 1.76, + "learning_rate": 2.1281888491591493e-07, + "loss": 0.6571, + "step": 8460 + }, + { + "epoch": 1.76, + "learning_rate": 2.12455252261455e-07, + "loss": 0.8604, + "step": 8461 + }, + { + "epoch": 1.76, + "learning_rate": 2.120919191284787e-07, + "loss": 0.8741, + "step": 8462 + }, + { + "epoch": 1.76, + "learning_rate": 2.1172888555602677e-07, + "loss": 0.7081, + "step": 8463 + }, + { + "epoch": 1.76, + "learning_rate": 2.1136615158310736e-07, + "loss": 0.892, + "step": 8464 + }, + { + "epoch": 1.76, + "learning_rate": 2.1100371724869595e-07, + "loss": 0.8351, + "step": 8465 + }, + { + "epoch": 1.76, + "learning_rate": 2.1064158259173606e-07, + "loss": 0.9477, + "step": 8466 + }, + { + "epoch": 1.76, + "learning_rate": 2.1027974765113888e-07, + "loss": 1.0376, + "step": 8467 + }, + { + "epoch": 1.76, + "learning_rate": 2.099182124657837e-07, + "loss": 1.1415, + "step": 8468 + }, + { + "epoch": 1.76, + "learning_rate": 2.0955697707451804e-07, + "loss": 1.029, + "step": 8469 + }, + { + "epoch": 1.76, + "learning_rate": 2.0919604151615557e-07, + "loss": 1.0601, + "step": 8470 + }, + { + "epoch": 1.76, + "learning_rate": 2.0883540582947958e-07, + "loss": 0.8298, + "step": 8471 + }, + { + "epoch": 1.76, + "learning_rate": 2.0847507005323973e-07, + "loss": 1.0209, + "step": 8472 + }, + { + "epoch": 1.76, + "learning_rate": 2.0811503422615475e-07, + "loss": 1.042, + "step": 8473 + }, + { + "epoch": 1.76, + "learning_rate": 2.0775529838690998e-07, + "loss": 1.0013, + "step": 8474 + }, + { + "epoch": 1.76, + "learning_rate": 2.073958625741589e-07, + "loss": 0.8389, + "step": 8475 + }, + { + "epoch": 1.76, + "learning_rate": 2.070367268265232e-07, + "loss": 0.9519, + "step": 8476 + }, + { + "epoch": 1.76, + "learning_rate": 2.066778911825924e-07, + "loss": 0.9829, + "step": 8477 + }, + { + "epoch": 1.76, + "learning_rate": 2.0631935568092264e-07, + "loss": 0.7217, + "step": 8478 + }, + { + "epoch": 1.76, + "learning_rate": 2.0596112036003843e-07, + "loss": 0.9597, + "step": 8479 + }, + { + "epoch": 1.76, + "learning_rate": 2.0560318525843235e-07, + "loss": 0.7858, + "step": 8480 + }, + { + "epoch": 1.76, + "learning_rate": 2.0524555041456529e-07, + "loss": 0.7716, + "step": 8481 + }, + { + "epoch": 1.76, + "learning_rate": 2.0488821586686412e-07, + "loss": 0.676, + "step": 8482 + }, + { + "epoch": 1.76, + "learning_rate": 2.0453118165372488e-07, + "loss": 0.98, + "step": 8483 + }, + { + "epoch": 1.76, + "learning_rate": 2.041744478135098e-07, + "loss": 0.8541, + "step": 8484 + }, + { + "epoch": 1.76, + "learning_rate": 2.0381801438455196e-07, + "loss": 0.8107, + "step": 8485 + }, + { + "epoch": 1.76, + "learning_rate": 2.03461881405148e-07, + "loss": 0.9009, + "step": 8486 + }, + { + "epoch": 1.77, + "learning_rate": 2.0310604891356565e-07, + "loss": 0.8193, + "step": 8487 + }, + { + "epoch": 1.77, + "learning_rate": 2.027505169480387e-07, + "loss": 1.1523, + "step": 8488 + }, + { + "epoch": 1.77, + "learning_rate": 2.0239528554676957e-07, + "loss": 0.975, + "step": 8489 + }, + { + "epoch": 1.77, + "learning_rate": 2.0204035474792704e-07, + "loss": 0.9217, + "step": 8490 + }, + { + "epoch": 1.77, + "learning_rate": 2.0168572458964862e-07, + "loss": 1.1346, + "step": 8491 + }, + { + "epoch": 1.77, + "learning_rate": 2.013313951100395e-07, + "loss": 0.9395, + "step": 8492 + }, + { + "epoch": 1.77, + "learning_rate": 2.0097736634717256e-07, + "loss": 0.8031, + "step": 8493 + }, + { + "epoch": 1.77, + "learning_rate": 2.0062363833908737e-07, + "loss": 0.7065, + "step": 8494 + }, + { + "epoch": 1.77, + "learning_rate": 2.0027021112379284e-07, + "loss": 0.8936, + "step": 8495 + }, + { + "epoch": 1.77, + "learning_rate": 1.9991708473926396e-07, + "loss": 0.97, + "step": 8496 + }, + { + "epoch": 1.77, + "learning_rate": 1.9956425922344535e-07, + "loss": 0.8256, + "step": 8497 + }, + { + "epoch": 1.77, + "learning_rate": 1.9921173461424636e-07, + "loss": 0.8703, + "step": 8498 + }, + { + "epoch": 1.77, + "learning_rate": 1.98859510949547e-07, + "loss": 0.9512, + "step": 8499 + }, + { + "epoch": 1.77, + "learning_rate": 1.9850758826719306e-07, + "loss": 0.7937, + "step": 8500 + }, + { + "epoch": 1.77, + "learning_rate": 1.9815596660499924e-07, + "loss": 0.8825, + "step": 8501 + }, + { + "epoch": 1.77, + "learning_rate": 1.9780464600074666e-07, + "loss": 0.8541, + "step": 8502 + }, + { + "epoch": 1.77, + "learning_rate": 1.9745362649218478e-07, + "loss": 0.9076, + "step": 8503 + }, + { + "epoch": 1.77, + "learning_rate": 1.971029081170308e-07, + "loss": 1.0537, + "step": 8504 + }, + { + "epoch": 1.77, + "learning_rate": 1.9675249091296987e-07, + "loss": 0.8136, + "step": 8505 + }, + { + "epoch": 1.77, + "learning_rate": 1.9640237491765323e-07, + "loss": 0.9571, + "step": 8506 + }, + { + "epoch": 1.77, + "learning_rate": 1.960525601687011e-07, + "loss": 1.1271, + "step": 8507 + }, + { + "epoch": 1.77, + "learning_rate": 1.9570304670370142e-07, + "loss": 0.9708, + "step": 8508 + }, + { + "epoch": 1.77, + "learning_rate": 1.9535383456020983e-07, + "loss": 1.0329, + "step": 8509 + }, + { + "epoch": 1.77, + "learning_rate": 1.950049237757483e-07, + "loss": 0.9904, + "step": 8510 + }, + { + "epoch": 1.77, + "learning_rate": 1.9465631438780717e-07, + "loss": 0.8341, + "step": 8511 + }, + { + "epoch": 1.77, + "learning_rate": 1.9430800643384516e-07, + "loss": 0.8754, + "step": 8512 + }, + { + "epoch": 1.77, + "learning_rate": 1.9395999995128798e-07, + "loss": 1.0296, + "step": 8513 + }, + { + "epoch": 1.77, + "learning_rate": 1.9361229497752807e-07, + "loss": 0.7381, + "step": 8514 + }, + { + "epoch": 1.77, + "learning_rate": 1.9326489154992688e-07, + "loss": 0.7567, + "step": 8515 + }, + { + "epoch": 1.77, + "learning_rate": 1.9291778970581287e-07, + "loss": 0.8849, + "step": 8516 + }, + { + "epoch": 1.77, + "learning_rate": 1.9257098948248254e-07, + "loss": 0.8237, + "step": 8517 + }, + { + "epoch": 1.77, + "learning_rate": 1.922244909171984e-07, + "loss": 0.7484, + "step": 8518 + }, + { + "epoch": 1.77, + "learning_rate": 1.9187829404719236e-07, + "loss": 0.796, + "step": 8519 + }, + { + "epoch": 1.77, + "learning_rate": 1.9153239890966334e-07, + "loss": 0.9524, + "step": 8520 + }, + { + "epoch": 1.77, + "learning_rate": 1.9118680554177824e-07, + "loss": 0.9991, + "step": 8521 + }, + { + "epoch": 1.77, + "learning_rate": 1.9084151398066972e-07, + "loss": 0.742, + "step": 8522 + }, + { + "epoch": 1.77, + "learning_rate": 1.9049652426344012e-07, + "loss": 0.8947, + "step": 8523 + }, + { + "epoch": 1.77, + "learning_rate": 1.9015183642715872e-07, + "loss": 0.9968, + "step": 8524 + }, + { + "epoch": 1.77, + "learning_rate": 1.898074505088623e-07, + "loss": 1.0003, + "step": 8525 + }, + { + "epoch": 1.77, + "learning_rate": 1.8946336654555419e-07, + "loss": 0.8205, + "step": 8526 + }, + { + "epoch": 1.77, + "learning_rate": 1.891195845742072e-07, + "loss": 0.7508, + "step": 8527 + }, + { + "epoch": 1.77, + "learning_rate": 1.8877610463175976e-07, + "loss": 1.006, + "step": 8528 + }, + { + "epoch": 1.77, + "learning_rate": 1.8843292675512003e-07, + "loss": 0.7673, + "step": 8529 + }, + { + "epoch": 1.77, + "learning_rate": 1.8809005098116118e-07, + "loss": 1.0111, + "step": 8530 + }, + { + "epoch": 1.77, + "learning_rate": 1.8774747734672571e-07, + "loss": 0.8303, + "step": 8531 + }, + { + "epoch": 1.77, + "learning_rate": 1.874052058886232e-07, + "loss": 0.9373, + "step": 8532 + }, + { + "epoch": 1.77, + "learning_rate": 1.870632366436309e-07, + "loss": 1.0064, + "step": 8533 + }, + { + "epoch": 1.77, + "learning_rate": 1.8672156964849273e-07, + "loss": 1.1016, + "step": 8534 + }, + { + "epoch": 1.78, + "learning_rate": 1.8638020493992102e-07, + "loss": 0.9433, + "step": 8535 + }, + { + "epoch": 1.78, + "learning_rate": 1.8603914255459575e-07, + "loss": 0.8858, + "step": 8536 + }, + { + "epoch": 1.78, + "learning_rate": 1.8569838252916426e-07, + "loss": 0.8296, + "step": 8537 + }, + { + "epoch": 1.78, + "learning_rate": 1.8535792490023996e-07, + "loss": 0.9773, + "step": 8538 + }, + { + "epoch": 1.78, + "learning_rate": 1.850177697044062e-07, + "loss": 0.8625, + "step": 8539 + }, + { + "epoch": 1.78, + "learning_rate": 1.846779169782118e-07, + "loss": 0.7268, + "step": 8540 + }, + { + "epoch": 1.78, + "learning_rate": 1.8433836675817516e-07, + "loss": 0.803, + "step": 8541 + }, + { + "epoch": 1.78, + "learning_rate": 1.8399911908077948e-07, + "loss": 0.8632, + "step": 8542 + }, + { + "epoch": 1.78, + "learning_rate": 1.8366017398247758e-07, + "loss": 0.8865, + "step": 8543 + }, + { + "epoch": 1.78, + "learning_rate": 1.8332153149968866e-07, + "loss": 0.7746, + "step": 8544 + }, + { + "epoch": 1.78, + "learning_rate": 1.8298319166880096e-07, + "loss": 1.0519, + "step": 8545 + }, + { + "epoch": 1.78, + "learning_rate": 1.8264515452616803e-07, + "loss": 0.7311, + "step": 8546 + }, + { + "epoch": 1.78, + "learning_rate": 1.8230742010811185e-07, + "loss": 0.8401, + "step": 8547 + }, + { + "epoch": 1.78, + "learning_rate": 1.8196998845092237e-07, + "loss": 1.0609, + "step": 8548 + }, + { + "epoch": 1.78, + "learning_rate": 1.816328595908573e-07, + "loss": 0.82, + "step": 8549 + }, + { + "epoch": 1.78, + "learning_rate": 1.8129603356413958e-07, + "loss": 0.9378, + "step": 8550 + }, + { + "epoch": 1.78, + "learning_rate": 1.80959510406962e-07, + "loss": 0.9337, + "step": 8551 + }, + { + "epoch": 1.78, + "learning_rate": 1.8062329015548394e-07, + "loss": 0.9158, + "step": 8552 + }, + { + "epoch": 1.78, + "learning_rate": 1.8028737284583252e-07, + "loss": 1.1609, + "step": 8553 + }, + { + "epoch": 1.78, + "learning_rate": 1.7995175851410084e-07, + "loss": 0.8224, + "step": 8554 + }, + { + "epoch": 1.78, + "learning_rate": 1.7961644719635205e-07, + "loss": 1.0372, + "step": 8555 + }, + { + "epoch": 1.78, + "learning_rate": 1.7928143892861472e-07, + "loss": 0.9045, + "step": 8556 + }, + { + "epoch": 1.78, + "learning_rate": 1.7894673374688563e-07, + "loss": 1.0657, + "step": 8557 + }, + { + "epoch": 1.78, + "learning_rate": 1.786123316871281e-07, + "loss": 0.9859, + "step": 8558 + }, + { + "epoch": 1.78, + "learning_rate": 1.78278232785275e-07, + "loss": 0.962, + "step": 8559 + }, + { + "epoch": 1.78, + "learning_rate": 1.7794443707722396e-07, + "loss": 0.9386, + "step": 8560 + }, + { + "epoch": 1.78, + "learning_rate": 1.776109445988423e-07, + "loss": 0.8462, + "step": 8561 + }, + { + "epoch": 1.78, + "learning_rate": 1.7727775538596202e-07, + "loss": 0.7558, + "step": 8562 + }, + { + "epoch": 1.78, + "learning_rate": 1.769448694743868e-07, + "loss": 0.9324, + "step": 8563 + }, + { + "epoch": 1.78, + "learning_rate": 1.766122868998834e-07, + "loss": 0.8704, + "step": 8564 + }, + { + "epoch": 1.78, + "learning_rate": 1.7628000769818852e-07, + "loss": 1.0899, + "step": 8565 + }, + { + "epoch": 1.78, + "learning_rate": 1.7594803190500464e-07, + "loss": 0.9742, + "step": 8566 + }, + { + "epoch": 1.78, + "learning_rate": 1.7561635955600387e-07, + "loss": 1.0721, + "step": 8567 + }, + { + "epoch": 1.78, + "learning_rate": 1.752849906868237e-07, + "loss": 0.7708, + "step": 8568 + }, + { + "epoch": 1.78, + "learning_rate": 1.7495392533306965e-07, + "loss": 1.0112, + "step": 8569 + }, + { + "epoch": 1.78, + "learning_rate": 1.7462316353031428e-07, + "loss": 0.9187, + "step": 8570 + }, + { + "epoch": 1.78, + "learning_rate": 1.742927053140988e-07, + "loss": 0.6863, + "step": 8571 + }, + { + "epoch": 1.78, + "learning_rate": 1.739625507199305e-07, + "loss": 0.8122, + "step": 8572 + }, + { + "epoch": 1.78, + "learning_rate": 1.7363269978328466e-07, + "loss": 0.9236, + "step": 8573 + }, + { + "epoch": 1.78, + "learning_rate": 1.733031525396026e-07, + "loss": 0.897, + "step": 8574 + }, + { + "epoch": 1.78, + "learning_rate": 1.7297390902429633e-07, + "loss": 1.0524, + "step": 8575 + }, + { + "epoch": 1.78, + "learning_rate": 1.7264496927274087e-07, + "loss": 0.8717, + "step": 8576 + }, + { + "epoch": 1.78, + "learning_rate": 1.7231633332028262e-07, + "loss": 0.824, + "step": 8577 + }, + { + "epoch": 1.78, + "learning_rate": 1.7198800120223136e-07, + "loss": 1.096, + "step": 8578 + }, + { + "epoch": 1.78, + "learning_rate": 1.716599729538685e-07, + "loss": 1.1118, + "step": 8579 + }, + { + "epoch": 1.78, + "learning_rate": 1.713322486104395e-07, + "loss": 1.0268, + "step": 8580 + }, + { + "epoch": 1.78, + "learning_rate": 1.710048282071589e-07, + "loss": 0.8554, + "step": 8581 + }, + { + "epoch": 1.78, + "learning_rate": 1.7067771177920654e-07, + "loss": 0.8209, + "step": 8582 + }, + { + "epoch": 1.79, + "learning_rate": 1.7035089936173298e-07, + "loss": 0.7647, + "step": 8583 + }, + { + "epoch": 1.79, + "learning_rate": 1.7002439098985313e-07, + "loss": 0.9887, + "step": 8584 + }, + { + "epoch": 1.79, + "learning_rate": 1.6969818669865123e-07, + "loss": 0.9583, + "step": 8585 + }, + { + "epoch": 1.79, + "learning_rate": 1.6937228652317593e-07, + "loss": 0.6103, + "step": 8586 + }, + { + "epoch": 1.79, + "learning_rate": 1.6904669049844755e-07, + "loss": 0.7916, + "step": 8587 + }, + { + "epoch": 1.79, + "learning_rate": 1.6872139865944946e-07, + "loss": 1.209, + "step": 8588 + }, + { + "epoch": 1.79, + "learning_rate": 1.68396411041136e-07, + "loss": 1.0049, + "step": 8589 + }, + { + "epoch": 1.79, + "learning_rate": 1.6807172767842494e-07, + "loss": 1.0435, + "step": 8590 + }, + { + "epoch": 1.79, + "learning_rate": 1.6774734860620567e-07, + "loss": 0.9117, + "step": 8591 + }, + { + "epoch": 1.79, + "learning_rate": 1.6742327385933098e-07, + "loss": 0.9153, + "step": 8592 + }, + { + "epoch": 1.79, + "learning_rate": 1.6709950347262403e-07, + "loss": 0.96, + "step": 8593 + }, + { + "epoch": 1.79, + "learning_rate": 1.6677603748087234e-07, + "loss": 0.8912, + "step": 8594 + }, + { + "epoch": 1.79, + "learning_rate": 1.6645287591883406e-07, + "loss": 1.1597, + "step": 8595 + }, + { + "epoch": 1.79, + "learning_rate": 1.661300188212318e-07, + "loss": 0.7291, + "step": 8596 + }, + { + "epoch": 1.79, + "learning_rate": 1.6580746622275743e-07, + "loss": 0.987, + "step": 8597 + }, + { + "epoch": 1.79, + "learning_rate": 1.6548521815806728e-07, + "loss": 0.7126, + "step": 8598 + }, + { + "epoch": 1.79, + "learning_rate": 1.6516327466178927e-07, + "loss": 0.6394, + "step": 8599 + }, + { + "epoch": 1.79, + "learning_rate": 1.648416357685144e-07, + "loss": 0.8386, + "step": 8600 + }, + { + "epoch": 1.79, + "learning_rate": 1.645203015128044e-07, + "loss": 0.8251, + "step": 8601 + }, + { + "epoch": 1.79, + "learning_rate": 1.6419927192918427e-07, + "loss": 0.7738, + "step": 8602 + }, + { + "epoch": 1.79, + "learning_rate": 1.638785470521511e-07, + "loss": 0.7871, + "step": 8603 + }, + { + "epoch": 1.79, + "learning_rate": 1.6355812691616534e-07, + "loss": 0.7759, + "step": 8604 + }, + { + "epoch": 1.79, + "learning_rate": 1.6323801155565677e-07, + "loss": 0.9888, + "step": 8605 + }, + { + "epoch": 1.79, + "learning_rate": 1.6291820100502054e-07, + "loss": 0.919, + "step": 8606 + }, + { + "epoch": 1.79, + "learning_rate": 1.6259869529862214e-07, + "loss": 0.8985, + "step": 8607 + }, + { + "epoch": 1.79, + "learning_rate": 1.6227949447079082e-07, + "loss": 1.0473, + "step": 8608 + }, + { + "epoch": 1.79, + "learning_rate": 1.619605985558258e-07, + "loss": 0.7107, + "step": 8609 + }, + { + "epoch": 1.79, + "learning_rate": 1.616420075879913e-07, + "loss": 0.6823, + "step": 8610 + }, + { + "epoch": 1.79, + "learning_rate": 1.613237216015213e-07, + "loss": 1.059, + "step": 8611 + }, + { + "epoch": 1.79, + "learning_rate": 1.6100574063061414e-07, + "loss": 0.8368, + "step": 8612 + }, + { + "epoch": 1.79, + "learning_rate": 1.606880647094384e-07, + "loss": 0.9022, + "step": 8613 + }, + { + "epoch": 1.79, + "learning_rate": 1.6037069387212656e-07, + "loss": 0.9481, + "step": 8614 + }, + { + "epoch": 1.79, + "learning_rate": 1.6005362815278157e-07, + "loss": 0.7456, + "step": 8615 + }, + { + "epoch": 1.79, + "learning_rate": 1.5973686758547157e-07, + "loss": 0.8968, + "step": 8616 + }, + { + "epoch": 1.79, + "learning_rate": 1.5942041220423263e-07, + "loss": 0.7977, + "step": 8617 + }, + { + "epoch": 1.79, + "learning_rate": 1.5910426204306693e-07, + "loss": 0.993, + "step": 8618 + }, + { + "epoch": 1.79, + "learning_rate": 1.587884171359466e-07, + "loss": 0.9736, + "step": 8619 + }, + { + "epoch": 1.79, + "learning_rate": 1.5847287751680751e-07, + "loss": 0.7948, + "step": 8620 + }, + { + "epoch": 1.79, + "learning_rate": 1.5815764321955484e-07, + "loss": 1.028, + "step": 8621 + }, + { + "epoch": 1.79, + "learning_rate": 1.5784271427806052e-07, + "loss": 1.0659, + "step": 8622 + }, + { + "epoch": 1.79, + "learning_rate": 1.5752809072616448e-07, + "loss": 1.0306, + "step": 8623 + }, + { + "epoch": 1.79, + "learning_rate": 1.5721377259767168e-07, + "loss": 0.9231, + "step": 8624 + }, + { + "epoch": 1.79, + "learning_rate": 1.5689975992635608e-07, + "loss": 0.8913, + "step": 8625 + }, + { + "epoch": 1.79, + "learning_rate": 1.5658605274595873e-07, + "loss": 0.7793, + "step": 8626 + }, + { + "epoch": 1.79, + "learning_rate": 1.562726510901873e-07, + "loss": 0.845, + "step": 8627 + }, + { + "epoch": 1.79, + "learning_rate": 1.5595955499271618e-07, + "loss": 0.7636, + "step": 8628 + }, + { + "epoch": 1.79, + "learning_rate": 1.5564676448718784e-07, + "loss": 0.6148, + "step": 8629 + }, + { + "epoch": 1.79, + "learning_rate": 1.553342796072117e-07, + "loss": 0.8556, + "step": 8630 + }, + { + "epoch": 1.8, + "learning_rate": 1.5502210038636488e-07, + "loss": 0.9265, + "step": 8631 + }, + { + "epoch": 1.8, + "learning_rate": 1.5471022685818958e-07, + "loss": 0.9458, + "step": 8632 + }, + { + "epoch": 1.8, + "learning_rate": 1.5439865905619766e-07, + "loss": 0.7518, + "step": 8633 + }, + { + "epoch": 1.8, + "learning_rate": 1.5408739701386665e-07, + "loss": 1.0239, + "step": 8634 + }, + { + "epoch": 1.8, + "learning_rate": 1.5377644076464215e-07, + "loss": 0.9473, + "step": 8635 + }, + { + "epoch": 1.8, + "learning_rate": 1.5346579034193575e-07, + "loss": 0.7061, + "step": 8636 + }, + { + "epoch": 1.8, + "learning_rate": 1.531554457791271e-07, + "loss": 0.8725, + "step": 8637 + }, + { + "epoch": 1.8, + "learning_rate": 1.528454071095622e-07, + "loss": 0.8304, + "step": 8638 + }, + { + "epoch": 1.8, + "learning_rate": 1.5253567436655601e-07, + "loss": 0.8278, + "step": 8639 + }, + { + "epoch": 1.8, + "learning_rate": 1.5222624758338767e-07, + "loss": 0.721, + "step": 8640 + }, + { + "epoch": 1.8, + "learning_rate": 1.5191712679330615e-07, + "loss": 0.7633, + "step": 8641 + }, + { + "epoch": 1.8, + "learning_rate": 1.5160831202952596e-07, + "loss": 0.9015, + "step": 8642 + }, + { + "epoch": 1.8, + "learning_rate": 1.5129980332522953e-07, + "loss": 0.8693, + "step": 8643 + }, + { + "epoch": 1.8, + "learning_rate": 1.50991600713566e-07, + "loss": 0.8314, + "step": 8644 + }, + { + "epoch": 1.8, + "learning_rate": 1.5068370422765155e-07, + "loss": 0.7139, + "step": 8645 + }, + { + "epoch": 1.8, + "learning_rate": 1.5037611390056938e-07, + "loss": 0.8171, + "step": 8646 + }, + { + "epoch": 1.8, + "learning_rate": 1.500688297653714e-07, + "loss": 0.856, + "step": 8647 + }, + { + "epoch": 1.8, + "learning_rate": 1.4976185185507352e-07, + "loss": 1.0599, + "step": 8648 + }, + { + "epoch": 1.8, + "learning_rate": 1.4945518020266137e-07, + "loss": 0.9084, + "step": 8649 + }, + { + "epoch": 1.8, + "learning_rate": 1.4914881484108656e-07, + "loss": 0.959, + "step": 8650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4884275580326878e-07, + "loss": 0.7219, + "step": 8651 + }, + { + "epoch": 1.8, + "learning_rate": 1.4853700312209306e-07, + "loss": 0.9555, + "step": 8652 + }, + { + "epoch": 1.8, + "learning_rate": 1.482315568304128e-07, + "loss": 0.7575, + "step": 8653 + }, + { + "epoch": 1.8, + "learning_rate": 1.4792641696104803e-07, + "loss": 0.9906, + "step": 8654 + }, + { + "epoch": 1.8, + "learning_rate": 1.476215835467869e-07, + "loss": 0.8127, + "step": 8655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4731705662038253e-07, + "loss": 0.9157, + "step": 8656 + }, + { + "epoch": 1.8, + "learning_rate": 1.4701283621455708e-07, + "loss": 0.9892, + "step": 8657 + }, + { + "epoch": 1.8, + "learning_rate": 1.4670892236199872e-07, + "loss": 0.8128, + "step": 8658 + }, + { + "epoch": 1.8, + "learning_rate": 1.4640531509536336e-07, + "loss": 0.9506, + "step": 8659 + }, + { + "epoch": 1.8, + "learning_rate": 1.4610201444727288e-07, + "loss": 0.9985, + "step": 8660 + }, + { + "epoch": 1.8, + "learning_rate": 1.4579902045031725e-07, + "loss": 0.8248, + "step": 8661 + }, + { + "epoch": 1.8, + "learning_rate": 1.4549633313705336e-07, + "loss": 0.886, + "step": 8662 + }, + { + "epoch": 1.8, + "learning_rate": 1.451939525400049e-07, + "loss": 0.8659, + "step": 8663 + }, + { + "epoch": 1.8, + "learning_rate": 1.4489187869166255e-07, + "loss": 0.9402, + "step": 8664 + }, + { + "epoch": 1.8, + "learning_rate": 1.4459011162448365e-07, + "loss": 1.0588, + "step": 8665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4428865137089363e-07, + "loss": 0.8897, + "step": 8666 + }, + { + "epoch": 1.8, + "learning_rate": 1.4398749796328492e-07, + "loss": 0.9975, + "step": 8667 + }, + { + "epoch": 1.8, + "learning_rate": 1.4368665143401493e-07, + "loss": 1.0206, + "step": 8668 + }, + { + "epoch": 1.8, + "learning_rate": 1.4338611181541084e-07, + "loss": 0.8947, + "step": 8669 + }, + { + "epoch": 1.8, + "learning_rate": 1.4308587913976512e-07, + "loss": 0.9003, + "step": 8670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4278595343933797e-07, + "loss": 0.8393, + "step": 8671 + }, + { + "epoch": 1.8, + "learning_rate": 1.4248633474635598e-07, + "loss": 0.9458, + "step": 8672 + }, + { + "epoch": 1.8, + "learning_rate": 1.4218702309301335e-07, + "loss": 0.7382, + "step": 8673 + }, + { + "epoch": 1.8, + "learning_rate": 1.4188801851147136e-07, + "loss": 0.738, + "step": 8674 + }, + { + "epoch": 1.8, + "learning_rate": 1.41589321033858e-07, + "loss": 0.9004, + "step": 8675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129093069226762e-07, + "loss": 0.8197, + "step": 8676 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099284751876318e-07, + "loss": 1.1169, + "step": 8677 + }, + { + "epoch": 1.8, + "learning_rate": 1.4069507154537275e-07, + "loss": 0.7801, + "step": 8678 + }, + { + "epoch": 1.81, + "learning_rate": 1.4039760280409375e-07, + "loss": 0.9528, + "step": 8679 + }, + { + "epoch": 1.81, + "learning_rate": 1.401004413268876e-07, + "loss": 0.9906, + "step": 8680 + }, + { + "epoch": 1.81, + "learning_rate": 1.3980358714568475e-07, + "loss": 0.7829, + "step": 8681 + }, + { + "epoch": 1.81, + "learning_rate": 1.3950704029238272e-07, + "loss": 1.1169, + "step": 8682 + }, + { + "epoch": 1.81, + "learning_rate": 1.392108007988453e-07, + "loss": 0.7517, + "step": 8683 + }, + { + "epoch": 1.81, + "learning_rate": 1.3891486869690308e-07, + "loss": 0.9378, + "step": 8684 + }, + { + "epoch": 1.81, + "learning_rate": 1.3861924401835358e-07, + "loss": 1.1041, + "step": 8685 + }, + { + "epoch": 1.81, + "learning_rate": 1.3832392679496243e-07, + "loss": 0.8494, + "step": 8686 + }, + { + "epoch": 1.81, + "learning_rate": 1.3802891705846154e-07, + "loss": 0.7902, + "step": 8687 + }, + { + "epoch": 1.81, + "learning_rate": 1.3773421484054893e-07, + "loss": 0.9179, + "step": 8688 + }, + { + "epoch": 1.81, + "learning_rate": 1.374398201728906e-07, + "loss": 0.8913, + "step": 8689 + }, + { + "epoch": 1.81, + "learning_rate": 1.371457330871192e-07, + "loss": 0.8196, + "step": 8690 + }, + { + "epoch": 1.81, + "learning_rate": 1.3685195361483515e-07, + "loss": 0.8807, + "step": 8691 + }, + { + "epoch": 1.81, + "learning_rate": 1.3655848178760355e-07, + "loss": 0.8562, + "step": 8692 + }, + { + "epoch": 1.81, + "learning_rate": 1.3626531763695916e-07, + "loss": 0.8431, + "step": 8693 + }, + { + "epoch": 1.81, + "learning_rate": 1.3597246119440176e-07, + "loss": 1.0611, + "step": 8694 + }, + { + "epoch": 1.81, + "learning_rate": 1.3567991249139987e-07, + "loss": 0.6829, + "step": 8695 + }, + { + "epoch": 1.81, + "learning_rate": 1.3538767155938603e-07, + "loss": 0.8018, + "step": 8696 + }, + { + "epoch": 1.81, + "learning_rate": 1.3509573842976274e-07, + "loss": 0.9725, + "step": 8697 + }, + { + "epoch": 1.81, + "learning_rate": 1.3480411313389794e-07, + "loss": 0.9501, + "step": 8698 + }, + { + "epoch": 1.81, + "learning_rate": 1.3451279570312724e-07, + "loss": 0.8841, + "step": 8699 + }, + { + "epoch": 1.81, + "learning_rate": 1.3422178616875158e-07, + "loss": 0.9094, + "step": 8700 + }, + { + "epoch": 1.81, + "learning_rate": 1.339310845620406e-07, + "loss": 0.8765, + "step": 8701 + }, + { + "epoch": 1.81, + "learning_rate": 1.3364069091423004e-07, + "loss": 0.9552, + "step": 8702 + }, + { + "epoch": 1.81, + "learning_rate": 1.333506052565232e-07, + "loss": 0.7815, + "step": 8703 + }, + { + "epoch": 1.81, + "learning_rate": 1.3306082762008853e-07, + "loss": 0.7942, + "step": 8704 + }, + { + "epoch": 1.81, + "learning_rate": 1.3277135803606377e-07, + "loss": 0.8211, + "step": 8705 + }, + { + "epoch": 1.81, + "learning_rate": 1.3248219653555173e-07, + "loss": 0.9814, + "step": 8706 + }, + { + "epoch": 1.81, + "learning_rate": 1.321933431496235e-07, + "loss": 0.8976, + "step": 8707 + }, + { + "epoch": 1.81, + "learning_rate": 1.3190479790931564e-07, + "loss": 1.0227, + "step": 8708 + }, + { + "epoch": 1.81, + "learning_rate": 1.3161656084563235e-07, + "loss": 1.0066, + "step": 8709 + }, + { + "epoch": 1.81, + "learning_rate": 1.3132863198954514e-07, + "loss": 0.8993, + "step": 8710 + }, + { + "epoch": 1.81, + "learning_rate": 1.31041011371992e-07, + "loss": 0.8675, + "step": 8711 + }, + { + "epoch": 1.81, + "learning_rate": 1.307536990238768e-07, + "loss": 0.8188, + "step": 8712 + }, + { + "epoch": 1.81, + "learning_rate": 1.3046669497607222e-07, + "loss": 0.986, + "step": 8713 + }, + { + "epoch": 1.81, + "learning_rate": 1.3017999925941659e-07, + "loss": 0.9272, + "step": 8714 + }, + { + "epoch": 1.81, + "learning_rate": 1.2989361190471526e-07, + "loss": 0.9961, + "step": 8715 + }, + { + "epoch": 1.81, + "learning_rate": 1.2960753294274063e-07, + "loss": 0.7278, + "step": 8716 + }, + { + "epoch": 1.81, + "learning_rate": 1.2932176240423145e-07, + "loss": 0.807, + "step": 8717 + }, + { + "epoch": 1.81, + "learning_rate": 1.290363003198941e-07, + "loss": 0.945, + "step": 8718 + }, + { + "epoch": 1.81, + "learning_rate": 1.2875114672040177e-07, + "loss": 0.9455, + "step": 8719 + }, + { + "epoch": 1.81, + "learning_rate": 1.284663016363936e-07, + "loss": 1.0067, + "step": 8720 + }, + { + "epoch": 1.81, + "learning_rate": 1.2818176509847613e-07, + "loss": 0.7686, + "step": 8721 + }, + { + "epoch": 1.81, + "learning_rate": 1.278975371372232e-07, + "loss": 1.0013, + "step": 8722 + }, + { + "epoch": 1.81, + "learning_rate": 1.2761361778317538e-07, + "loss": 0.9014, + "step": 8723 + }, + { + "epoch": 1.81, + "learning_rate": 1.2733000706683896e-07, + "loss": 0.6, + "step": 8724 + }, + { + "epoch": 1.81, + "learning_rate": 1.2704670501868854e-07, + "loss": 0.8149, + "step": 8725 + }, + { + "epoch": 1.81, + "learning_rate": 1.2676371166916412e-07, + "loss": 0.9656, + "step": 8726 + }, + { + "epoch": 1.82, + "learning_rate": 1.264810270486747e-07, + "loss": 0.8104, + "step": 8727 + }, + { + "epoch": 1.82, + "learning_rate": 1.2619865118759332e-07, + "loss": 0.6914, + "step": 8728 + }, + { + "epoch": 1.82, + "learning_rate": 1.2591658411626173e-07, + "loss": 0.9976, + "step": 8729 + }, + { + "epoch": 1.82, + "learning_rate": 1.256348258649883e-07, + "loss": 0.9094, + "step": 8730 + }, + { + "epoch": 1.82, + "learning_rate": 1.2535337646404789e-07, + "loss": 0.8177, + "step": 8731 + }, + { + "epoch": 1.82, + "learning_rate": 1.250722359436819e-07, + "loss": 0.908, + "step": 8732 + }, + { + "epoch": 1.82, + "learning_rate": 1.2479140433409886e-07, + "loss": 0.8359, + "step": 8733 + }, + { + "epoch": 1.82, + "learning_rate": 1.2451088166547396e-07, + "loss": 0.8347, + "step": 8734 + }, + { + "epoch": 1.82, + "learning_rate": 1.2423066796795045e-07, + "loss": 1.1402, + "step": 8735 + }, + { + "epoch": 1.82, + "learning_rate": 1.239507632716359e-07, + "loss": 0.9373, + "step": 8736 + }, + { + "epoch": 1.82, + "learning_rate": 1.2367116760660658e-07, + "loss": 0.9795, + "step": 8737 + }, + { + "epoch": 1.82, + "learning_rate": 1.233918810029051e-07, + "loss": 1.045, + "step": 8738 + }, + { + "epoch": 1.82, + "learning_rate": 1.2311290349054115e-07, + "loss": 0.9265, + "step": 8739 + }, + { + "epoch": 1.82, + "learning_rate": 1.2283423509949009e-07, + "loss": 0.8812, + "step": 8740 + }, + { + "epoch": 1.82, + "learning_rate": 1.2255587585969498e-07, + "loss": 0.9008, + "step": 8741 + }, + { + "epoch": 1.82, + "learning_rate": 1.2227782580106582e-07, + "loss": 0.8863, + "step": 8742 + }, + { + "epoch": 1.82, + "learning_rate": 1.2200008495347947e-07, + "loss": 0.8548, + "step": 8743 + }, + { + "epoch": 1.82, + "learning_rate": 1.21722653346778e-07, + "loss": 0.8163, + "step": 8744 + }, + { + "epoch": 1.82, + "learning_rate": 1.2144553101077194e-07, + "loss": 0.9187, + "step": 8745 + }, + { + "epoch": 1.82, + "learning_rate": 1.2116871797523842e-07, + "loss": 0.8088, + "step": 8746 + }, + { + "epoch": 1.82, + "learning_rate": 1.2089221426992103e-07, + "loss": 0.8712, + "step": 8747 + }, + { + "epoch": 1.82, + "learning_rate": 1.206160199245293e-07, + "loss": 0.8527, + "step": 8748 + }, + { + "epoch": 1.82, + "learning_rate": 1.2034013496874085e-07, + "loss": 1.0134, + "step": 8749 + }, + { + "epoch": 1.82, + "learning_rate": 1.2006455943219962e-07, + "loss": 0.9545, + "step": 8750 + }, + { + "epoch": 1.82, + "learning_rate": 1.197892933445156e-07, + "loss": 0.7825, + "step": 8751 + }, + { + "epoch": 1.82, + "learning_rate": 1.1951433673526679e-07, + "loss": 0.7066, + "step": 8752 + }, + { + "epoch": 1.82, + "learning_rate": 1.192396896339969e-07, + "loss": 0.9271, + "step": 8753 + }, + { + "epoch": 1.82, + "learning_rate": 1.1896535207021697e-07, + "loss": 0.8163, + "step": 8754 + }, + { + "epoch": 1.82, + "learning_rate": 1.1869132407340377e-07, + "loss": 0.8796, + "step": 8755 + }, + { + "epoch": 1.82, + "learning_rate": 1.1841760567300275e-07, + "loss": 0.9534, + "step": 8756 + }, + { + "epoch": 1.82, + "learning_rate": 1.1814419689842404e-07, + "loss": 0.9137, + "step": 8757 + }, + { + "epoch": 1.82, + "learning_rate": 1.1787109777904614e-07, + "loss": 1.0394, + "step": 8758 + }, + { + "epoch": 1.82, + "learning_rate": 1.1759830834421259e-07, + "loss": 0.9068, + "step": 8759 + }, + { + "epoch": 1.82, + "learning_rate": 1.1732582862323559e-07, + "loss": 0.801, + "step": 8760 + }, + { + "epoch": 1.82, + "learning_rate": 1.1705365864539241e-07, + "loss": 0.651, + "step": 8761 + }, + { + "epoch": 1.82, + "learning_rate": 1.1678179843992831e-07, + "loss": 1.0716, + "step": 8762 + }, + { + "epoch": 1.82, + "learning_rate": 1.1651024803605326e-07, + "loss": 0.8412, + "step": 8763 + }, + { + "epoch": 1.82, + "learning_rate": 1.1623900746294725e-07, + "loss": 0.8577, + "step": 8764 + }, + { + "epoch": 1.82, + "learning_rate": 1.1596807674975363e-07, + "loss": 0.9258, + "step": 8765 + }, + { + "epoch": 1.82, + "learning_rate": 1.1569745592558446e-07, + "loss": 0.7716, + "step": 8766 + }, + { + "epoch": 1.82, + "learning_rate": 1.1542714501951745e-07, + "loss": 0.9074, + "step": 8767 + }, + { + "epoch": 1.82, + "learning_rate": 1.151571440605984e-07, + "loss": 0.9609, + "step": 8768 + }, + { + "epoch": 1.82, + "learning_rate": 1.148874530778381e-07, + "loss": 0.8188, + "step": 8769 + }, + { + "epoch": 1.82, + "learning_rate": 1.1461807210021568e-07, + "loss": 0.765, + "step": 8770 + }, + { + "epoch": 1.82, + "learning_rate": 1.1434900115667435e-07, + "loss": 0.8972, + "step": 8771 + }, + { + "epoch": 1.82, + "learning_rate": 1.1408024027612796e-07, + "loss": 0.7995, + "step": 8772 + }, + { + "epoch": 1.82, + "learning_rate": 1.1381178948745341e-07, + "loss": 0.8662, + "step": 8773 + }, + { + "epoch": 1.82, + "learning_rate": 1.1354364881949664e-07, + "loss": 0.8001, + "step": 8774 + }, + { + "epoch": 1.83, + "learning_rate": 1.1327581830106792e-07, + "loss": 0.9153, + "step": 8775 + }, + { + "epoch": 1.83, + "learning_rate": 1.1300829796094725e-07, + "loss": 0.8645, + "step": 8776 + }, + { + "epoch": 1.83, + "learning_rate": 1.1274108782787829e-07, + "loss": 0.94, + "step": 8777 + }, + { + "epoch": 1.83, + "learning_rate": 1.124741879305744e-07, + "loss": 0.8206, + "step": 8778 + }, + { + "epoch": 1.83, + "learning_rate": 1.1220759829771165e-07, + "loss": 0.9419, + "step": 8779 + }, + { + "epoch": 1.83, + "learning_rate": 1.1194131895793747e-07, + "loss": 1.0499, + "step": 8780 + }, + { + "epoch": 1.83, + "learning_rate": 1.1167534993986194e-07, + "loss": 0.8817, + "step": 8781 + }, + { + "epoch": 1.83, + "learning_rate": 1.1140969127206457e-07, + "loss": 0.8633, + "step": 8782 + }, + { + "epoch": 1.83, + "learning_rate": 1.1114434298308885e-07, + "loss": 0.9625, + "step": 8783 + }, + { + "epoch": 1.83, + "learning_rate": 1.1087930510144795e-07, + "loss": 0.7988, + "step": 8784 + }, + { + "epoch": 1.83, + "learning_rate": 1.1061457765561944e-07, + "loss": 0.8008, + "step": 8785 + }, + { + "epoch": 1.83, + "learning_rate": 1.1035016067404857e-07, + "loss": 1.0448, + "step": 8786 + }, + { + "epoch": 1.83, + "learning_rate": 1.1008605418514628e-07, + "loss": 0.9878, + "step": 8787 + }, + { + "epoch": 1.83, + "learning_rate": 1.0982225821729153e-07, + "loss": 0.9366, + "step": 8788 + }, + { + "epoch": 1.83, + "learning_rate": 1.0955877279882898e-07, + "loss": 1.0422, + "step": 8789 + }, + { + "epoch": 1.83, + "learning_rate": 1.0929559795806998e-07, + "loss": 0.8562, + "step": 8790 + }, + { + "epoch": 1.83, + "learning_rate": 1.0903273372329225e-07, + "loss": 0.7615, + "step": 8791 + }, + { + "epoch": 1.83, + "learning_rate": 1.0877018012274154e-07, + "loss": 1.0317, + "step": 8792 + }, + { + "epoch": 1.83, + "learning_rate": 1.085079371846286e-07, + "loss": 0.9895, + "step": 8793 + }, + { + "epoch": 1.83, + "learning_rate": 1.0824600493713155e-07, + "loss": 1.067, + "step": 8794 + }, + { + "epoch": 1.83, + "learning_rate": 1.0798438340839423e-07, + "loss": 0.8959, + "step": 8795 + }, + { + "epoch": 1.83, + "learning_rate": 1.0772307262652947e-07, + "loss": 1.2524, + "step": 8796 + }, + { + "epoch": 1.83, + "learning_rate": 1.0746207261961349e-07, + "loss": 1.1436, + "step": 8797 + }, + { + "epoch": 1.83, + "learning_rate": 1.0720138341569186e-07, + "loss": 0.8371, + "step": 8798 + }, + { + "epoch": 1.83, + "learning_rate": 1.0694100504277449e-07, + "loss": 1.0194, + "step": 8799 + }, + { + "epoch": 1.83, + "learning_rate": 1.0668093752884068e-07, + "loss": 0.7493, + "step": 8800 + }, + { + "epoch": 1.83, + "learning_rate": 1.0642118090183272e-07, + "loss": 0.9571, + "step": 8801 + }, + { + "epoch": 1.83, + "learning_rate": 1.0616173518966333e-07, + "loss": 0.8823, + "step": 8802 + }, + { + "epoch": 1.83, + "learning_rate": 1.0590260042020783e-07, + "loss": 0.8216, + "step": 8803 + }, + { + "epoch": 1.83, + "learning_rate": 1.0564377662131231e-07, + "loss": 0.8041, + "step": 8804 + }, + { + "epoch": 1.83, + "learning_rate": 1.0538526382078618e-07, + "loss": 0.7058, + "step": 8805 + }, + { + "epoch": 1.83, + "learning_rate": 1.0512706204640721e-07, + "loss": 0.9056, + "step": 8806 + }, + { + "epoch": 1.83, + "learning_rate": 1.0486917132591822e-07, + "loss": 1.0106, + "step": 8807 + }, + { + "epoch": 1.83, + "learning_rate": 1.0461159168703072e-07, + "loss": 0.8406, + "step": 8808 + }, + { + "epoch": 1.83, + "learning_rate": 1.0435432315742055e-07, + "loss": 1.207, + "step": 8809 + }, + { + "epoch": 1.83, + "learning_rate": 1.0409736576473227e-07, + "loss": 0.939, + "step": 8810 + }, + { + "epoch": 1.83, + "learning_rate": 1.0384071953657481e-07, + "loss": 0.7596, + "step": 8811 + }, + { + "epoch": 1.83, + "learning_rate": 1.035843845005261e-07, + "loss": 0.9864, + "step": 8812 + }, + { + "epoch": 1.83, + "learning_rate": 1.033283606841281e-07, + "loss": 0.9213, + "step": 8813 + }, + { + "epoch": 1.83, + "learning_rate": 1.0307264811489148e-07, + "loss": 0.8678, + "step": 8814 + }, + { + "epoch": 1.83, + "learning_rate": 1.0281724682029125e-07, + "loss": 0.9581, + "step": 8815 + }, + { + "epoch": 1.83, + "learning_rate": 1.0256215682777181e-07, + "loss": 0.7242, + "step": 8816 + }, + { + "epoch": 1.83, + "learning_rate": 1.0230737816474155e-07, + "loss": 0.9333, + "step": 8817 + }, + { + "epoch": 1.83, + "learning_rate": 1.0205291085857693e-07, + "loss": 1.3792, + "step": 8818 + }, + { + "epoch": 1.83, + "learning_rate": 1.0179875493662005e-07, + "loss": 0.9088, + "step": 8819 + }, + { + "epoch": 1.83, + "learning_rate": 1.0154491042618075e-07, + "loss": 0.8225, + "step": 8820 + }, + { + "epoch": 1.83, + "learning_rate": 1.012913773545332e-07, + "loss": 0.9644, + "step": 8821 + }, + { + "epoch": 1.83, + "learning_rate": 1.0103815574892062e-07, + "loss": 0.7724, + "step": 8822 + }, + { + "epoch": 1.84, + "learning_rate": 1.0078524563655123e-07, + "loss": 0.964, + "step": 8823 + }, + { + "epoch": 1.84, + "learning_rate": 1.0053264704460063e-07, + "loss": 0.8344, + "step": 8824 + }, + { + "epoch": 1.84, + "learning_rate": 1.0028036000021012e-07, + "loss": 0.9131, + "step": 8825 + }, + { + "epoch": 1.84, + "learning_rate": 1.0002838453048769e-07, + "loss": 0.9014, + "step": 8826 + }, + { + "epoch": 1.84, + "learning_rate": 9.977672066250864e-08, + "loss": 0.9679, + "step": 8827 + }, + { + "epoch": 1.84, + "learning_rate": 9.952536842331406e-08, + "loss": 0.8236, + "step": 8828 + }, + { + "epoch": 1.84, + "learning_rate": 9.927432783991164e-08, + "loss": 0.7802, + "step": 8829 + }, + { + "epoch": 1.84, + "learning_rate": 9.902359893927548e-08, + "loss": 0.9159, + "step": 8830 + }, + { + "epoch": 1.84, + "learning_rate": 9.87731817483467e-08, + "loss": 1.0462, + "step": 8831 + }, + { + "epoch": 1.84, + "learning_rate": 9.852307629403278e-08, + "loss": 0.7632, + "step": 8832 + }, + { + "epoch": 1.84, + "learning_rate": 9.827328260320689e-08, + "loss": 0.7517, + "step": 8833 + }, + { + "epoch": 1.84, + "learning_rate": 9.802380070270988e-08, + "loss": 0.9895, + "step": 8834 + }, + { + "epoch": 1.84, + "learning_rate": 9.777463061934833e-08, + "loss": 1.0161, + "step": 8835 + }, + { + "epoch": 1.84, + "learning_rate": 9.752577237989613e-08, + "loss": 1.0208, + "step": 8836 + }, + { + "epoch": 1.84, + "learning_rate": 9.72772260110919e-08, + "loss": 0.7952, + "step": 8837 + }, + { + "epoch": 1.84, + "learning_rate": 9.702899153964295e-08, + "loss": 0.9096, + "step": 8838 + }, + { + "epoch": 1.84, + "learning_rate": 9.678106899222161e-08, + "loss": 1.0175, + "step": 8839 + }, + { + "epoch": 1.84, + "learning_rate": 9.653345839546723e-08, + "loss": 0.8577, + "step": 8840 + }, + { + "epoch": 1.84, + "learning_rate": 9.628615977598554e-08, + "loss": 1.0704, + "step": 8841 + }, + { + "epoch": 1.84, + "learning_rate": 9.603917316034893e-08, + "loss": 1.169, + "step": 8842 + }, + { + "epoch": 1.84, + "learning_rate": 9.579249857509553e-08, + "loss": 0.9591, + "step": 8843 + }, + { + "epoch": 1.84, + "learning_rate": 9.554613604673146e-08, + "loss": 0.9598, + "step": 8844 + }, + { + "epoch": 1.84, + "learning_rate": 9.530008560172754e-08, + "loss": 0.9445, + "step": 8845 + }, + { + "epoch": 1.84, + "learning_rate": 9.505434726652196e-08, + "loss": 1.0151, + "step": 8846 + }, + { + "epoch": 1.84, + "learning_rate": 9.480892106751992e-08, + "loss": 0.9573, + "step": 8847 + }, + { + "epoch": 1.84, + "learning_rate": 9.456380703109203e-08, + "loss": 0.8878, + "step": 8848 + }, + { + "epoch": 1.84, + "learning_rate": 9.431900518357516e-08, + "loss": 0.9656, + "step": 8849 + }, + { + "epoch": 1.84, + "learning_rate": 9.4074515551274e-08, + "loss": 0.9191, + "step": 8850 + }, + { + "epoch": 1.84, + "learning_rate": 9.383033816045916e-08, + "loss": 1.0429, + "step": 8851 + }, + { + "epoch": 1.84, + "learning_rate": 9.3586473037367e-08, + "loss": 0.89, + "step": 8852 + }, + { + "epoch": 1.84, + "learning_rate": 9.33429202082009e-08, + "loss": 0.6837, + "step": 8853 + }, + { + "epoch": 1.84, + "learning_rate": 9.309967969913024e-08, + "loss": 0.742, + "step": 8854 + }, + { + "epoch": 1.84, + "learning_rate": 9.285675153629181e-08, + "loss": 0.8368, + "step": 8855 + }, + { + "epoch": 1.84, + "learning_rate": 9.261413574578837e-08, + "loss": 0.9928, + "step": 8856 + }, + { + "epoch": 1.84, + "learning_rate": 9.237183235368807e-08, + "loss": 0.9835, + "step": 8857 + }, + { + "epoch": 1.84, + "learning_rate": 9.212984138602675e-08, + "loss": 0.7858, + "step": 8858 + }, + { + "epoch": 1.84, + "learning_rate": 9.188816286880696e-08, + "loss": 0.8413, + "step": 8859 + }, + { + "epoch": 1.84, + "learning_rate": 9.16467968279966e-08, + "loss": 0.6813, + "step": 8860 + }, + { + "epoch": 1.84, + "learning_rate": 9.140574328952989e-08, + "loss": 0.8608, + "step": 8861 + }, + { + "epoch": 1.84, + "learning_rate": 9.11650022793088e-08, + "loss": 0.8713, + "step": 8862 + }, + { + "epoch": 1.84, + "learning_rate": 9.092457382320063e-08, + "loss": 0.7107, + "step": 8863 + }, + { + "epoch": 1.84, + "learning_rate": 9.068445794703938e-08, + "loss": 0.9134, + "step": 8864 + }, + { + "epoch": 1.84, + "learning_rate": 9.044465467662543e-08, + "loss": 0.8225, + "step": 8865 + }, + { + "epoch": 1.84, + "learning_rate": 9.020516403772583e-08, + "loss": 0.9227, + "step": 8866 + }, + { + "epoch": 1.84, + "learning_rate": 8.996598605607365e-08, + "loss": 0.7421, + "step": 8867 + }, + { + "epoch": 1.84, + "learning_rate": 8.9727120757369e-08, + "loss": 1.0443, + "step": 8868 + }, + { + "epoch": 1.84, + "learning_rate": 8.948856816727701e-08, + "loss": 0.9589, + "step": 8869 + }, + { + "epoch": 1.84, + "learning_rate": 8.925032831143087e-08, + "loss": 0.8965, + "step": 8870 + }, + { + "epoch": 1.85, + "learning_rate": 8.901240121542908e-08, + "loss": 0.7594, + "step": 8871 + }, + { + "epoch": 1.85, + "learning_rate": 8.877478690483754e-08, + "loss": 0.8825, + "step": 8872 + }, + { + "epoch": 1.85, + "learning_rate": 8.853748540518714e-08, + "loss": 0.9479, + "step": 8873 + }, + { + "epoch": 1.85, + "learning_rate": 8.830049674197615e-08, + "loss": 0.9289, + "step": 8874 + }, + { + "epoch": 1.85, + "learning_rate": 8.806382094066889e-08, + "loss": 0.9374, + "step": 8875 + }, + { + "epoch": 1.85, + "learning_rate": 8.782745802669667e-08, + "loss": 0.8648, + "step": 8876 + }, + { + "epoch": 1.85, + "learning_rate": 8.759140802545585e-08, + "loss": 0.8144, + "step": 8877 + }, + { + "epoch": 1.85, + "learning_rate": 8.735567096231045e-08, + "loss": 0.962, + "step": 8878 + }, + { + "epoch": 1.85, + "learning_rate": 8.712024686259057e-08, + "loss": 1.2144, + "step": 8879 + }, + { + "epoch": 1.85, + "learning_rate": 8.688513575159229e-08, + "loss": 0.8417, + "step": 8880 + }, + { + "epoch": 1.85, + "learning_rate": 8.665033765457808e-08, + "loss": 0.905, + "step": 8881 + }, + { + "epoch": 1.85, + "learning_rate": 8.641585259677742e-08, + "loss": 0.9808, + "step": 8882 + }, + { + "epoch": 1.85, + "learning_rate": 8.618168060338517e-08, + "loss": 0.8279, + "step": 8883 + }, + { + "epoch": 1.85, + "learning_rate": 8.594782169956416e-08, + "loss": 0.8421, + "step": 8884 + }, + { + "epoch": 1.85, + "learning_rate": 8.571427591044067e-08, + "loss": 1.055, + "step": 8885 + }, + { + "epoch": 1.85, + "learning_rate": 8.548104326111128e-08, + "loss": 0.8414, + "step": 8886 + }, + { + "epoch": 1.85, + "learning_rate": 8.524812377663526e-08, + "loss": 1.2104, + "step": 8887 + }, + { + "epoch": 1.85, + "learning_rate": 8.501551748204062e-08, + "loss": 0.8782, + "step": 8888 + }, + { + "epoch": 1.85, + "learning_rate": 8.478322440232034e-08, + "loss": 1.0471, + "step": 8889 + }, + { + "epoch": 1.85, + "learning_rate": 8.45512445624348e-08, + "loss": 0.7771, + "step": 8890 + }, + { + "epoch": 1.85, + "learning_rate": 8.431957798730972e-08, + "loss": 0.9645, + "step": 8891 + }, + { + "epoch": 1.85, + "learning_rate": 8.408822470183853e-08, + "loss": 0.8644, + "step": 8892 + }, + { + "epoch": 1.85, + "learning_rate": 8.385718473087867e-08, + "loss": 1.0227, + "step": 8893 + }, + { + "epoch": 1.85, + "learning_rate": 8.362645809925695e-08, + "loss": 0.7503, + "step": 8894 + }, + { + "epoch": 1.85, + "learning_rate": 8.339604483176388e-08, + "loss": 0.9093, + "step": 8895 + }, + { + "epoch": 1.85, + "learning_rate": 8.3165944953158e-08, + "loss": 0.8579, + "step": 8896 + }, + { + "epoch": 1.85, + "learning_rate": 8.293615848816249e-08, + "loss": 0.902, + "step": 8897 + }, + { + "epoch": 1.85, + "learning_rate": 8.270668546146897e-08, + "loss": 1.0601, + "step": 8898 + }, + { + "epoch": 1.85, + "learning_rate": 8.247752589773405e-08, + "loss": 0.9935, + "step": 8899 + }, + { + "epoch": 1.85, + "learning_rate": 8.224867982158102e-08, + "loss": 0.8943, + "step": 8900 + }, + { + "epoch": 1.85, + "learning_rate": 8.202014725759821e-08, + "loss": 0.7451, + "step": 8901 + }, + { + "epoch": 1.85, + "learning_rate": 8.179192823034332e-08, + "loss": 0.795, + "step": 8902 + }, + { + "epoch": 1.85, + "learning_rate": 8.156402276433706e-08, + "loss": 0.8263, + "step": 8903 + }, + { + "epoch": 1.85, + "learning_rate": 8.13364308840685e-08, + "loss": 0.778, + "step": 8904 + }, + { + "epoch": 1.85, + "learning_rate": 8.110915261399176e-08, + "loss": 1.0232, + "step": 8905 + }, + { + "epoch": 1.85, + "learning_rate": 8.088218797852864e-08, + "loss": 0.7922, + "step": 8906 + }, + { + "epoch": 1.85, + "learning_rate": 8.065553700206596e-08, + "loss": 0.8218, + "step": 8907 + }, + { + "epoch": 1.85, + "learning_rate": 8.04291997089579e-08, + "loss": 0.9154, + "step": 8908 + }, + { + "epoch": 1.85, + "learning_rate": 8.020317612352334e-08, + "loss": 0.772, + "step": 8909 + }, + { + "epoch": 1.85, + "learning_rate": 7.997746627004986e-08, + "loss": 0.8498, + "step": 8910 + }, + { + "epoch": 1.85, + "learning_rate": 7.975207017278874e-08, + "loss": 0.86, + "step": 8911 + }, + { + "epoch": 1.85, + "learning_rate": 7.952698785595958e-08, + "loss": 0.9616, + "step": 8912 + }, + { + "epoch": 1.85, + "learning_rate": 7.93022193437467e-08, + "loss": 0.7272, + "step": 8913 + }, + { + "epoch": 1.85, + "learning_rate": 7.907776466030248e-08, + "loss": 1.0239, + "step": 8914 + }, + { + "epoch": 1.85, + "learning_rate": 7.885362382974393e-08, + "loss": 0.9132, + "step": 8915 + }, + { + "epoch": 1.85, + "learning_rate": 7.862979687615513e-08, + "loss": 0.9538, + "step": 8916 + }, + { + "epoch": 1.85, + "learning_rate": 7.84062838235855e-08, + "loss": 0.9429, + "step": 8917 + }, + { + "epoch": 1.85, + "learning_rate": 7.818308469605284e-08, + "loss": 0.8057, + "step": 8918 + }, + { + "epoch": 1.86, + "learning_rate": 7.79601995175393e-08, + "loss": 0.7427, + "step": 8919 + }, + { + "epoch": 1.86, + "learning_rate": 7.773762831199371e-08, + "loss": 1.1314, + "step": 8920 + }, + { + "epoch": 1.86, + "learning_rate": 7.751537110333095e-08, + "loss": 0.7672, + "step": 8921 + }, + { + "epoch": 1.86, + "learning_rate": 7.729342791543359e-08, + "loss": 1.1193, + "step": 8922 + }, + { + "epoch": 1.86, + "learning_rate": 7.707179877214854e-08, + "loss": 0.7619, + "step": 8923 + }, + { + "epoch": 1.86, + "learning_rate": 7.685048369729042e-08, + "loss": 0.8398, + "step": 8924 + }, + { + "epoch": 1.86, + "learning_rate": 7.662948271463887e-08, + "loss": 0.7717, + "step": 8925 + }, + { + "epoch": 1.86, + "learning_rate": 7.640879584794125e-08, + "loss": 1.0374, + "step": 8926 + }, + { + "epoch": 1.86, + "learning_rate": 7.618842312090957e-08, + "loss": 0.805, + "step": 8927 + }, + { + "epoch": 1.86, + "learning_rate": 7.596836455722322e-08, + "loss": 0.982, + "step": 8928 + }, + { + "epoch": 1.86, + "learning_rate": 7.574862018052731e-08, + "loss": 0.9168, + "step": 8929 + }, + { + "epoch": 1.86, + "learning_rate": 7.552919001443359e-08, + "loss": 0.9428, + "step": 8930 + }, + { + "epoch": 1.86, + "learning_rate": 7.531007408251989e-08, + "loss": 0.9952, + "step": 8931 + }, + { + "epoch": 1.86, + "learning_rate": 7.509127240833002e-08, + "loss": 0.902, + "step": 8932 + }, + { + "epoch": 1.86, + "learning_rate": 7.487278501537387e-08, + "loss": 0.9845, + "step": 8933 + }, + { + "epoch": 1.86, + "learning_rate": 7.465461192712864e-08, + "loss": 0.8084, + "step": 8934 + }, + { + "epoch": 1.86, + "learning_rate": 7.443675316703624e-08, + "loss": 0.775, + "step": 8935 + }, + { + "epoch": 1.86, + "learning_rate": 7.421920875850663e-08, + "loss": 0.9385, + "step": 8936 + }, + { + "epoch": 1.86, + "learning_rate": 7.400197872491343e-08, + "loss": 0.9315, + "step": 8937 + }, + { + "epoch": 1.86, + "learning_rate": 7.378506308959932e-08, + "loss": 0.7924, + "step": 8938 + }, + { + "epoch": 1.86, + "learning_rate": 7.356846187587096e-08, + "loss": 0.9144, + "step": 8939 + }, + { + "epoch": 1.86, + "learning_rate": 7.33521751070031e-08, + "loss": 0.9805, + "step": 8940 + }, + { + "epoch": 1.86, + "learning_rate": 7.313620280623479e-08, + "loss": 0.7276, + "step": 8941 + }, + { + "epoch": 1.86, + "learning_rate": 7.29205449967728e-08, + "loss": 0.9552, + "step": 8942 + }, + { + "epoch": 1.86, + "learning_rate": 7.27052017017893e-08, + "loss": 0.8099, + "step": 8943 + }, + { + "epoch": 1.86, + "learning_rate": 7.249017294442306e-08, + "loss": 0.8926, + "step": 8944 + }, + { + "epoch": 1.86, + "learning_rate": 7.227545874777863e-08, + "loss": 1.0869, + "step": 8945 + }, + { + "epoch": 1.86, + "learning_rate": 7.206105913492756e-08, + "loss": 0.6811, + "step": 8946 + }, + { + "epoch": 1.86, + "learning_rate": 7.184697412890639e-08, + "loss": 1.0924, + "step": 8947 + }, + { + "epoch": 1.86, + "learning_rate": 7.163320375271943e-08, + "loss": 0.9437, + "step": 8948 + }, + { + "epoch": 1.86, + "learning_rate": 7.141974802933493e-08, + "loss": 0.8825, + "step": 8949 + }, + { + "epoch": 1.86, + "learning_rate": 7.120660698169023e-08, + "loss": 0.9821, + "step": 8950 + }, + { + "epoch": 1.86, + "learning_rate": 7.099378063268635e-08, + "loss": 1.0697, + "step": 8951 + }, + { + "epoch": 1.86, + "learning_rate": 7.0781269005192e-08, + "loss": 1.0297, + "step": 8952 + }, + { + "epoch": 1.86, + "learning_rate": 7.056907212204089e-08, + "loss": 1.025, + "step": 8953 + }, + { + "epoch": 1.86, + "learning_rate": 7.035719000603413e-08, + "loss": 0.7977, + "step": 8954 + }, + { + "epoch": 1.86, + "learning_rate": 7.014562267993818e-08, + "loss": 1.2075, + "step": 8955 + }, + { + "epoch": 1.86, + "learning_rate": 6.993437016648618e-08, + "loss": 0.7931, + "step": 8956 + }, + { + "epoch": 1.86, + "learning_rate": 6.972343248837665e-08, + "loss": 1.104, + "step": 8957 + }, + { + "epoch": 1.86, + "learning_rate": 6.951280966827578e-08, + "loss": 0.8299, + "step": 8958 + }, + { + "epoch": 1.86, + "learning_rate": 6.930250172881413e-08, + "loss": 1.0534, + "step": 8959 + }, + { + "epoch": 1.86, + "learning_rate": 6.909250869258931e-08, + "loss": 0.7543, + "step": 8960 + }, + { + "epoch": 1.86, + "learning_rate": 6.888283058216527e-08, + "loss": 0.8495, + "step": 8961 + }, + { + "epoch": 1.86, + "learning_rate": 6.86734674200723e-08, + "loss": 0.711, + "step": 8962 + }, + { + "epoch": 1.86, + "learning_rate": 6.846441922880575e-08, + "loss": 0.8729, + "step": 8963 + }, + { + "epoch": 1.86, + "learning_rate": 6.825568603082832e-08, + "loss": 0.8515, + "step": 8964 + }, + { + "epoch": 1.86, + "learning_rate": 6.804726784856808e-08, + "loss": 0.9051, + "step": 8965 + }, + { + "epoch": 1.86, + "learning_rate": 6.783916470442009e-08, + "loss": 0.967, + "step": 8966 + }, + { + "epoch": 1.87, + "learning_rate": 6.763137662074447e-08, + "loss": 1.0997, + "step": 8967 + }, + { + "epoch": 1.87, + "learning_rate": 6.742390361986772e-08, + "loss": 1.0068, + "step": 8968 + }, + { + "epoch": 1.87, + "learning_rate": 6.721674572408365e-08, + "loss": 0.7651, + "step": 8969 + }, + { + "epoch": 1.87, + "learning_rate": 6.700990295565112e-08, + "loss": 0.9311, + "step": 8970 + }, + { + "epoch": 1.87, + "learning_rate": 6.680337533679503e-08, + "loss": 0.91, + "step": 8971 + }, + { + "epoch": 1.87, + "learning_rate": 6.659716288970696e-08, + "loss": 1.1007, + "step": 8972 + }, + { + "epoch": 1.87, + "learning_rate": 6.639126563654419e-08, + "loss": 1.0034, + "step": 8973 + }, + { + "epoch": 1.87, + "learning_rate": 6.6185683599431e-08, + "loss": 0.8243, + "step": 8974 + }, + { + "epoch": 1.87, + "learning_rate": 6.59804168004564e-08, + "loss": 1.0651, + "step": 8975 + }, + { + "epoch": 1.87, + "learning_rate": 6.577546526167677e-08, + "loss": 0.7798, + "step": 8976 + }, + { + "epoch": 1.87, + "learning_rate": 6.557082900511413e-08, + "loss": 1.1291, + "step": 8977 + }, + { + "epoch": 1.87, + "learning_rate": 6.536650805275657e-08, + "loss": 0.8984, + "step": 8978 + }, + { + "epoch": 1.87, + "learning_rate": 6.516250242655819e-08, + "loss": 0.7518, + "step": 8979 + }, + { + "epoch": 1.87, + "learning_rate": 6.495881214843947e-08, + "loss": 0.9959, + "step": 8980 + }, + { + "epoch": 1.87, + "learning_rate": 6.475543724028687e-08, + "loss": 1.0238, + "step": 8981 + }, + { + "epoch": 1.87, + "learning_rate": 6.455237772395328e-08, + "loss": 0.9584, + "step": 8982 + }, + { + "epoch": 1.87, + "learning_rate": 6.43496336212569e-08, + "loss": 0.9091, + "step": 8983 + }, + { + "epoch": 1.87, + "learning_rate": 6.41472049539833e-08, + "loss": 0.8959, + "step": 8984 + }, + { + "epoch": 1.87, + "learning_rate": 6.394509174388274e-08, + "loss": 0.8909, + "step": 8985 + }, + { + "epoch": 1.87, + "learning_rate": 6.374329401267287e-08, + "loss": 0.8105, + "step": 8986 + }, + { + "epoch": 1.87, + "learning_rate": 6.354181178203633e-08, + "loss": 0.8629, + "step": 8987 + }, + { + "epoch": 1.87, + "learning_rate": 6.334064507362247e-08, + "loss": 0.8369, + "step": 8988 + }, + { + "epoch": 1.87, + "learning_rate": 6.313979390904668e-08, + "loss": 0.7838, + "step": 8989 + }, + { + "epoch": 1.87, + "learning_rate": 6.2939258309891e-08, + "loss": 0.758, + "step": 8990 + }, + { + "epoch": 1.87, + "learning_rate": 6.273903829770189e-08, + "loss": 0.6805, + "step": 8991 + }, + { + "epoch": 1.87, + "learning_rate": 6.25391338939938e-08, + "loss": 0.9571, + "step": 8992 + }, + { + "epoch": 1.87, + "learning_rate": 6.233954512024586e-08, + "loss": 1.2119, + "step": 8993 + }, + { + "epoch": 1.87, + "learning_rate": 6.214027199790462e-08, + "loss": 0.8437, + "step": 8994 + }, + { + "epoch": 1.87, + "learning_rate": 6.194131454838125e-08, + "loss": 0.9279, + "step": 8995 + }, + { + "epoch": 1.87, + "learning_rate": 6.174267279305368e-08, + "loss": 0.8195, + "step": 8996 + }, + { + "epoch": 1.87, + "learning_rate": 6.15443467532668e-08, + "loss": 0.6663, + "step": 8997 + }, + { + "epoch": 1.87, + "learning_rate": 6.134633645032994e-08, + "loss": 0.9441, + "step": 8998 + }, + { + "epoch": 1.87, + "learning_rate": 6.114864190551939e-08, + "loss": 1.0173, + "step": 8999 + }, + { + "epoch": 1.87, + "learning_rate": 6.095126314007781e-08, + "loss": 0.8508, + "step": 9000 + }, + { + "epoch": 1.87, + "eval_loss": NaN, + "eval_runtime": 15.0106, + "eval_samples_per_second": 352.75, + "eval_steps_per_second": 44.102, + "step": 9000 + }, + { + "epoch": 1.87, + "learning_rate": 6.075420017521293e-08, + "loss": 0.8437, + "step": 9001 + }, + { + "epoch": 1.87, + "learning_rate": 6.055745303210015e-08, + "loss": 0.8408, + "step": 9002 + }, + { + "epoch": 1.87, + "learning_rate": 6.036102173187885e-08, + "loss": 0.8385, + "step": 9003 + }, + { + "epoch": 1.87, + "learning_rate": 6.016490629565586e-08, + "loss": 0.826, + "step": 9004 + }, + { + "epoch": 1.87, + "learning_rate": 5.996910674450395e-08, + "loss": 1.0228, + "step": 9005 + }, + { + "epoch": 1.87, + "learning_rate": 5.9773623099462e-08, + "loss": 0.9783, + "step": 9006 + }, + { + "epoch": 1.87, + "learning_rate": 5.957845538153417e-08, + "loss": 0.8994, + "step": 9007 + }, + { + "epoch": 1.87, + "learning_rate": 5.9383603611691705e-08, + "loss": 0.8814, + "step": 9008 + }, + { + "epoch": 1.87, + "learning_rate": 5.9189067810870854e-08, + "loss": 0.8656, + "step": 9009 + }, + { + "epoch": 1.87, + "learning_rate": 5.8994847999975235e-08, + "loss": 0.7666, + "step": 9010 + }, + { + "epoch": 1.87, + "learning_rate": 5.880094419987314e-08, + "loss": 1.0052, + "step": 9011 + }, + { + "epoch": 1.87, + "learning_rate": 5.860735643139958e-08, + "loss": 0.7893, + "step": 9012 + }, + { + "epoch": 1.87, + "learning_rate": 5.841408471535592e-08, + "loss": 1.0014, + "step": 9013 + }, + { + "epoch": 1.87, + "learning_rate": 5.822112907250887e-08, + "loss": 0.8081, + "step": 9014 + }, + { + "epoch": 1.88, + "learning_rate": 5.802848952359152e-08, + "loss": 1.1453, + "step": 9015 + }, + { + "epoch": 1.88, + "learning_rate": 5.783616608930298e-08, + "loss": 1.0877, + "step": 9016 + }, + { + "epoch": 1.88, + "learning_rate": 5.7644158790308706e-08, + "loss": 0.728, + "step": 9017 + }, + { + "epoch": 1.88, + "learning_rate": 5.745246764723955e-08, + "loss": 0.9963, + "step": 9018 + }, + { + "epoch": 1.88, + "learning_rate": 5.7261092680692685e-08, + "loss": 0.7619, + "step": 9019 + }, + { + "epoch": 1.88, + "learning_rate": 5.7070033911231336e-08, + "loss": 0.8087, + "step": 9020 + }, + { + "epoch": 1.88, + "learning_rate": 5.6879291359385074e-08, + "loss": 1.1866, + "step": 9021 + }, + { + "epoch": 1.88, + "learning_rate": 5.668886504564852e-08, + "loss": 0.7834, + "step": 9022 + }, + { + "epoch": 1.88, + "learning_rate": 5.6498754990483624e-08, + "loss": 0.8715, + "step": 9023 + }, + { + "epoch": 1.88, + "learning_rate": 5.630896121431739e-08, + "loss": 1.0776, + "step": 9024 + }, + { + "epoch": 1.88, + "learning_rate": 5.6119483737543496e-08, + "loss": 0.8734, + "step": 9025 + }, + { + "epoch": 1.88, + "learning_rate": 5.593032258052066e-08, + "loss": 0.7871, + "step": 9026 + }, + { + "epoch": 1.88, + "learning_rate": 5.574147776357463e-08, + "loss": 1.0677, + "step": 9027 + }, + { + "epoch": 1.88, + "learning_rate": 5.5552949306996836e-08, + "loss": 0.9138, + "step": 9028 + }, + { + "epoch": 1.88, + "learning_rate": 5.536473723104474e-08, + "loss": 0.6629, + "step": 9029 + }, + { + "epoch": 1.88, + "learning_rate": 5.517684155594116e-08, + "loss": 0.9081, + "step": 9030 + }, + { + "epoch": 1.88, + "learning_rate": 5.498926230187595e-08, + "loss": 0.8881, + "step": 9031 + }, + { + "epoch": 1.88, + "learning_rate": 5.480199948900433e-08, + "loss": 1.1874, + "step": 9032 + }, + { + "epoch": 1.88, + "learning_rate": 5.4615053137447854e-08, + "loss": 0.9569, + "step": 9033 + }, + { + "epoch": 1.88, + "learning_rate": 5.442842326729347e-08, + "loss": 0.8031, + "step": 9034 + }, + { + "epoch": 1.88, + "learning_rate": 5.4242109898594796e-08, + "loss": 0.9998, + "step": 9035 + }, + { + "epoch": 1.88, + "learning_rate": 5.405611305137148e-08, + "loss": 0.7781, + "step": 9036 + }, + { + "epoch": 1.88, + "learning_rate": 5.3870432745608545e-08, + "loss": 0.9311, + "step": 9037 + }, + { + "epoch": 1.88, + "learning_rate": 5.368506900125736e-08, + "loss": 0.8615, + "step": 9038 + }, + { + "epoch": 1.88, + "learning_rate": 5.3500021838234994e-08, + "loss": 0.8552, + "step": 9039 + }, + { + "epoch": 1.88, + "learning_rate": 5.331529127642521e-08, + "loss": 1.0891, + "step": 9040 + }, + { + "epoch": 1.88, + "learning_rate": 5.3130877335677455e-08, + "loss": 0.9291, + "step": 9041 + }, + { + "epoch": 1.88, + "learning_rate": 5.294678003580622e-08, + "loss": 0.9409, + "step": 9042 + }, + { + "epoch": 1.88, + "learning_rate": 5.276299939659301e-08, + "loss": 0.9913, + "step": 9043 + }, + { + "epoch": 1.88, + "learning_rate": 5.257953543778538e-08, + "loss": 1.0143, + "step": 9044 + }, + { + "epoch": 1.88, + "learning_rate": 5.2396388179096864e-08, + "loss": 0.8186, + "step": 9045 + }, + { + "epoch": 1.88, + "learning_rate": 5.221355764020541e-08, + "loss": 0.9608, + "step": 9046 + }, + { + "epoch": 1.88, + "learning_rate": 5.2031043840756964e-08, + "loss": 0.9215, + "step": 9047 + }, + { + "epoch": 1.88, + "learning_rate": 5.1848846800362835e-08, + "loss": 0.8642, + "step": 9048 + }, + { + "epoch": 1.88, + "learning_rate": 5.16669665385997e-08, + "loss": 0.8216, + "step": 9049 + }, + { + "epoch": 1.88, + "learning_rate": 5.148540307501026e-08, + "loss": 0.824, + "step": 9050 + }, + { + "epoch": 1.88, + "learning_rate": 5.130415642910391e-08, + "loss": 0.8007, + "step": 9051 + }, + { + "epoch": 1.88, + "learning_rate": 5.1123226620355734e-08, + "loss": 0.9334, + "step": 9052 + }, + { + "epoch": 1.88, + "learning_rate": 5.094261366820652e-08, + "loss": 0.8264, + "step": 9053 + }, + { + "epoch": 1.88, + "learning_rate": 5.0762317592063067e-08, + "loss": 0.8115, + "step": 9054 + }, + { + "epoch": 1.88, + "learning_rate": 5.058233841129789e-08, + "loss": 0.9564, + "step": 9055 + }, + { + "epoch": 1.88, + "learning_rate": 5.0402676145249846e-08, + "loss": 0.8889, + "step": 9056 + }, + { + "epoch": 1.88, + "learning_rate": 5.022333081322417e-08, + "loss": 0.9086, + "step": 9057 + }, + { + "epoch": 1.88, + "learning_rate": 5.004430243449076e-08, + "loss": 1.0309, + "step": 9058 + }, + { + "epoch": 1.88, + "learning_rate": 4.9865591028286913e-08, + "loss": 0.7675, + "step": 9059 + }, + { + "epoch": 1.88, + "learning_rate": 4.968719661381427e-08, + "loss": 0.8004, + "step": 9060 + }, + { + "epoch": 1.88, + "learning_rate": 4.950911921024248e-08, + "loss": 1.1211, + "step": 9061 + }, + { + "epoch": 1.88, + "learning_rate": 4.933135883670492e-08, + "loss": 0.7869, + "step": 9062 + }, + { + "epoch": 1.88, + "learning_rate": 4.91539155123023e-08, + "loss": 0.901, + "step": 9063 + }, + { + "epoch": 1.89, + "learning_rate": 4.897678925610072e-08, + "loss": 0.8745, + "step": 9064 + }, + { + "epoch": 1.89, + "learning_rate": 4.8799980087132936e-08, + "loss": 0.7104, + "step": 9065 + }, + { + "epoch": 1.89, + "learning_rate": 4.862348802439642e-08, + "loss": 0.7841, + "step": 9066 + }, + { + "epoch": 1.89, + "learning_rate": 4.8447313086855686e-08, + "loss": 0.895, + "step": 9067 + }, + { + "epoch": 1.89, + "learning_rate": 4.8271455293440237e-08, + "loss": 0.837, + "step": 9068 + }, + { + "epoch": 1.89, + "learning_rate": 4.809591466304664e-08, + "loss": 1.1527, + "step": 9069 + }, + { + "epoch": 1.89, + "learning_rate": 4.792069121453646e-08, + "loss": 1.0853, + "step": 9070 + }, + { + "epoch": 1.89, + "learning_rate": 4.774578496673698e-08, + "loss": 0.8859, + "step": 9071 + }, + { + "epoch": 1.89, + "learning_rate": 4.757119593844217e-08, + "loss": 0.7974, + "step": 9072 + }, + { + "epoch": 1.89, + "learning_rate": 4.739692414841201e-08, + "loss": 0.8891, + "step": 9073 + }, + { + "epoch": 1.89, + "learning_rate": 4.722296961537154e-08, + "loss": 0.8108, + "step": 9074 + }, + { + "epoch": 1.89, + "learning_rate": 4.704933235801245e-08, + "loss": 0.9053, + "step": 9075 + }, + { + "epoch": 1.89, + "learning_rate": 4.687601239499151e-08, + "loss": 0.9003, + "step": 9076 + }, + { + "epoch": 1.89, + "learning_rate": 4.67030097449328e-08, + "loss": 0.8375, + "step": 9077 + }, + { + "epoch": 1.89, + "learning_rate": 4.653032442642446e-08, + "loss": 1.1031, + "step": 9078 + }, + { + "epoch": 1.89, + "learning_rate": 4.635795645802232e-08, + "loss": 0.8382, + "step": 9079 + }, + { + "epoch": 1.89, + "learning_rate": 4.618590585824656e-08, + "loss": 0.8688, + "step": 9080 + }, + { + "epoch": 1.89, + "learning_rate": 4.601417264558505e-08, + "loss": 0.9199, + "step": 9081 + }, + { + "epoch": 1.89, + "learning_rate": 4.584275683848971e-08, + "loss": 1.1365, + "step": 9082 + }, + { + "epoch": 1.89, + "learning_rate": 4.567165845537913e-08, + "loss": 1.1024, + "step": 9083 + }, + { + "epoch": 1.89, + "learning_rate": 4.550087751463794e-08, + "loss": 0.7422, + "step": 9084 + }, + { + "epoch": 1.89, + "learning_rate": 4.533041403461713e-08, + "loss": 0.9149, + "step": 9085 + }, + { + "epoch": 1.89, + "learning_rate": 4.5160268033631716e-08, + "loss": 0.8164, + "step": 9086 + }, + { + "epoch": 1.89, + "learning_rate": 4.49904395299654e-08, + "loss": 1.0618, + "step": 9087 + }, + { + "epoch": 1.89, + "learning_rate": 4.482092854186526e-08, + "loss": 1.0036, + "step": 9088 + }, + { + "epoch": 1.89, + "learning_rate": 4.465173508754539e-08, + "loss": 0.9272, + "step": 9089 + }, + { + "epoch": 1.89, + "learning_rate": 4.4482859185185576e-08, + "loss": 0.9098, + "step": 9090 + }, + { + "epoch": 1.89, + "learning_rate": 4.4314300852931975e-08, + "loss": 1.1384, + "step": 9091 + }, + { + "epoch": 1.89, + "learning_rate": 4.414606010889577e-08, + "loss": 0.8682, + "step": 9092 + }, + { + "epoch": 1.89, + "learning_rate": 4.397813697115449e-08, + "loss": 0.8418, + "step": 9093 + }, + { + "epoch": 1.89, + "learning_rate": 4.3810531457751044e-08, + "loss": 0.9708, + "step": 9094 + }, + { + "epoch": 1.89, + "learning_rate": 4.3643243586695356e-08, + "loss": 0.9308, + "step": 9095 + }, + { + "epoch": 1.89, + "learning_rate": 4.347627337596205e-08, + "loss": 1.1293, + "step": 9096 + }, + { + "epoch": 1.89, + "learning_rate": 4.330962084349277e-08, + "loss": 1.1552, + "step": 9097 + }, + { + "epoch": 1.89, + "learning_rate": 4.314328600719286e-08, + "loss": 0.7941, + "step": 9098 + }, + { + "epoch": 1.89, + "learning_rate": 4.297726888493636e-08, + "loss": 0.9275, + "step": 9099 + }, + { + "epoch": 1.89, + "learning_rate": 4.2811569494561334e-08, + "loss": 1.0203, + "step": 9100 + }, + { + "epoch": 1.89, + "learning_rate": 4.2646187853872196e-08, + "loss": 0.7914, + "step": 9101 + }, + { + "epoch": 1.89, + "learning_rate": 4.248112398063875e-08, + "loss": 0.8359, + "step": 9102 + }, + { + "epoch": 1.89, + "learning_rate": 4.231637789259779e-08, + "loss": 0.7969, + "step": 9103 + }, + { + "epoch": 1.89, + "learning_rate": 4.2151949607450836e-08, + "loss": 0.7382, + "step": 9104 + }, + { + "epoch": 1.89, + "learning_rate": 4.198783914286608e-08, + "loss": 0.8708, + "step": 9105 + }, + { + "epoch": 1.89, + "learning_rate": 4.182404651647676e-08, + "loss": 0.7673, + "step": 9106 + }, + { + "epoch": 1.89, + "learning_rate": 4.166057174588245e-08, + "loss": 1.0287, + "step": 9107 + }, + { + "epoch": 1.89, + "learning_rate": 4.149741484864877e-08, + "loss": 0.8401, + "step": 9108 + }, + { + "epoch": 1.89, + "learning_rate": 4.1334575842307045e-08, + "loss": 1.0616, + "step": 9109 + }, + { + "epoch": 1.89, + "learning_rate": 4.117205474435326e-08, + "loss": 0.9841, + "step": 9110 + }, + { + "epoch": 1.89, + "learning_rate": 4.100985157225145e-08, + "loss": 0.8877, + "step": 9111 + }, + { + "epoch": 1.9, + "learning_rate": 4.084796634343002e-08, + "loss": 0.8901, + "step": 9112 + }, + { + "epoch": 1.9, + "learning_rate": 4.068639907528371e-08, + "loss": 0.8253, + "step": 9113 + }, + { + "epoch": 1.9, + "learning_rate": 4.0525149785171966e-08, + "loss": 1.0443, + "step": 9114 + }, + { + "epoch": 1.9, + "learning_rate": 4.036421849042227e-08, + "loss": 0.9038, + "step": 9115 + }, + { + "epoch": 1.9, + "learning_rate": 4.020360520832578e-08, + "loss": 0.7869, + "step": 9116 + }, + { + "epoch": 1.9, + "learning_rate": 4.004330995614103e-08, + "loss": 0.9789, + "step": 9117 + }, + { + "epoch": 1.9, + "learning_rate": 3.988333275109091e-08, + "loss": 0.8931, + "step": 9118 + }, + { + "epoch": 1.9, + "learning_rate": 3.9723673610365666e-08, + "loss": 0.7156, + "step": 9119 + }, + { + "epoch": 1.9, + "learning_rate": 3.9564332551120574e-08, + "loss": 0.6838, + "step": 9120 + }, + { + "epoch": 1.9, + "learning_rate": 3.940530959047695e-08, + "loss": 1.0805, + "step": 9121 + }, + { + "epoch": 1.9, + "learning_rate": 3.9246604745520774e-08, + "loss": 0.8642, + "step": 9122 + }, + { + "epoch": 1.9, + "learning_rate": 3.908821803330642e-08, + "loss": 1.1389, + "step": 9123 + }, + { + "epoch": 1.9, + "learning_rate": 3.893014947085127e-08, + "loss": 0.9444, + "step": 9124 + }, + { + "epoch": 1.9, + "learning_rate": 3.877239907514074e-08, + "loss": 0.9135, + "step": 9125 + }, + { + "epoch": 1.9, + "learning_rate": 3.8614966863123934e-08, + "loss": 0.889, + "step": 9126 + }, + { + "epoch": 1.9, + "learning_rate": 3.845785285171799e-08, + "loss": 0.7592, + "step": 9127 + }, + { + "epoch": 1.9, + "learning_rate": 3.83010570578044e-08, + "loss": 0.907, + "step": 9128 + }, + { + "epoch": 1.9, + "learning_rate": 3.814457949823102e-08, + "loss": 0.872, + "step": 9129 + }, + { + "epoch": 1.9, + "learning_rate": 3.798842018981108e-08, + "loss": 0.8124, + "step": 9130 + }, + { + "epoch": 1.9, + "learning_rate": 3.783257914932414e-08, + "loss": 0.7714, + "step": 9131 + }, + { + "epoch": 1.9, + "learning_rate": 3.7677056393515154e-08, + "loss": 0.7628, + "step": 9132 + }, + { + "epoch": 1.9, + "learning_rate": 3.752185193909508e-08, + "loss": 0.8174, + "step": 9133 + }, + { + "epoch": 1.9, + "learning_rate": 3.7366965802740236e-08, + "loss": 0.8325, + "step": 9134 + }, + { + "epoch": 1.9, + "learning_rate": 3.721239800109399e-08, + "loss": 0.9285, + "step": 9135 + }, + { + "epoch": 1.9, + "learning_rate": 3.705814855076406e-08, + "loss": 0.8824, + "step": 9136 + }, + { + "epoch": 1.9, + "learning_rate": 3.690421746832484e-08, + "loss": 0.8579, + "step": 9137 + }, + { + "epoch": 1.9, + "learning_rate": 3.675060477031544e-08, + "loss": 0.7457, + "step": 9138 + }, + { + "epoch": 1.9, + "learning_rate": 3.659731047324266e-08, + "loss": 0.9032, + "step": 9139 + }, + { + "epoch": 1.9, + "learning_rate": 3.644433459357699e-08, + "loss": 1.1028, + "step": 9140 + }, + { + "epoch": 1.9, + "learning_rate": 3.629167714775661e-08, + "loss": 0.7792, + "step": 9141 + }, + { + "epoch": 1.9, + "learning_rate": 3.61393381521834e-08, + "loss": 1.1013, + "step": 9142 + }, + { + "epoch": 1.9, + "learning_rate": 3.598731762322727e-08, + "loss": 0.6506, + "step": 9143 + }, + { + "epoch": 1.9, + "learning_rate": 3.5835615577222146e-08, + "loss": 1.093, + "step": 9144 + }, + { + "epoch": 1.9, + "learning_rate": 3.568423203046867e-08, + "loss": 0.8001, + "step": 9145 + }, + { + "epoch": 1.9, + "learning_rate": 3.5533166999232815e-08, + "loss": 1.0611, + "step": 9146 + }, + { + "epoch": 1.9, + "learning_rate": 3.538242049974694e-08, + "loss": 1.0791, + "step": 9147 + }, + { + "epoch": 1.9, + "learning_rate": 3.523199254820808e-08, + "loss": 0.8449, + "step": 9148 + }, + { + "epoch": 1.9, + "learning_rate": 3.5081883160780315e-08, + "loss": 0.8439, + "step": 9149 + }, + { + "epoch": 1.9, + "learning_rate": 3.49320923535924e-08, + "loss": 0.8167, + "step": 9150 + }, + { + "epoch": 1.9, + "learning_rate": 3.478262014273981e-08, + "loss": 0.9283, + "step": 9151 + }, + { + "epoch": 1.9, + "learning_rate": 3.463346654428301e-08, + "loss": 0.9576, + "step": 9152 + }, + { + "epoch": 1.9, + "learning_rate": 3.4484631574248527e-08, + "loss": 1.1238, + "step": 9153 + }, + { + "epoch": 1.9, + "learning_rate": 3.4336115248628566e-08, + "loss": 1.0214, + "step": 9154 + }, + { + "epoch": 1.9, + "learning_rate": 3.418791758338202e-08, + "loss": 1.0576, + "step": 9155 + }, + { + "epoch": 1.9, + "learning_rate": 3.40400385944315e-08, + "loss": 0.8565, + "step": 9156 + }, + { + "epoch": 1.9, + "learning_rate": 3.389247829766762e-08, + "loss": 0.8673, + "step": 9157 + }, + { + "epoch": 1.9, + "learning_rate": 3.374523670894503e-08, + "loss": 1.0911, + "step": 9158 + }, + { + "epoch": 1.9, + "learning_rate": 3.35983138440854e-08, + "loss": 0.9061, + "step": 9159 + }, + { + "epoch": 1.91, + "learning_rate": 3.345170971887479e-08, + "loss": 0.9734, + "step": 9160 + }, + { + "epoch": 1.91, + "learning_rate": 3.330542434906658e-08, + "loss": 0.8997, + "step": 9161 + }, + { + "epoch": 1.91, + "learning_rate": 3.315945775037854e-08, + "loss": 0.9204, + "step": 9162 + }, + { + "epoch": 1.91, + "learning_rate": 3.3013809938495455e-08, + "loss": 0.6734, + "step": 9163 + }, + { + "epoch": 1.91, + "learning_rate": 3.28684809290668e-08, + "loss": 1.211, + "step": 9164 + }, + { + "epoch": 1.91, + "learning_rate": 3.272347073770776e-08, + "loss": 0.9972, + "step": 9165 + }, + { + "epoch": 1.91, + "learning_rate": 3.25787793800002e-08, + "loss": 0.874, + "step": 9166 + }, + { + "epoch": 1.91, + "learning_rate": 3.243440687149135e-08, + "loss": 0.8233, + "step": 9167 + }, + { + "epoch": 1.91, + "learning_rate": 3.229035322769314e-08, + "loss": 0.937, + "step": 9168 + }, + { + "epoch": 1.91, + "learning_rate": 3.214661846408518e-08, + "loss": 1.1607, + "step": 9169 + }, + { + "epoch": 1.91, + "learning_rate": 3.2003202596111113e-08, + "loss": 0.7561, + "step": 9170 + }, + { + "epoch": 1.91, + "learning_rate": 3.186010563918129e-08, + "loss": 0.7525, + "step": 9171 + }, + { + "epoch": 1.91, + "learning_rate": 3.1717327608671074e-08, + "loss": 0.9677, + "step": 9172 + }, + { + "epoch": 1.91, + "learning_rate": 3.15748685199222e-08, + "loss": 0.9215, + "step": 9173 + }, + { + "epoch": 1.91, + "learning_rate": 3.143272838824174e-08, + "loss": 0.9015, + "step": 9174 + }, + { + "epoch": 1.91, + "learning_rate": 3.1290907228903155e-08, + "loss": 0.8, + "step": 9175 + }, + { + "epoch": 1.91, + "learning_rate": 3.1149405057144585e-08, + "loss": 0.937, + "step": 9176 + }, + { + "epoch": 1.91, + "learning_rate": 3.100822188817054e-08, + "loss": 0.8152, + "step": 9177 + }, + { + "epoch": 1.91, + "learning_rate": 3.0867357737151213e-08, + "loss": 0.9334, + "step": 9178 + }, + { + "epoch": 1.91, + "learning_rate": 3.0726812619222835e-08, + "loss": 0.8239, + "step": 9179 + }, + { + "epoch": 1.91, + "learning_rate": 3.0586586549485984e-08, + "loss": 0.875, + "step": 9180 + }, + { + "epoch": 1.91, + "learning_rate": 3.044667954300862e-08, + "loss": 0.8406, + "step": 9181 + }, + { + "epoch": 1.91, + "learning_rate": 3.03070916148237e-08, + "loss": 0.9834, + "step": 9182 + }, + { + "epoch": 1.91, + "learning_rate": 3.016782277993024e-08, + "loss": 0.945, + "step": 9183 + }, + { + "epoch": 1.91, + "learning_rate": 3.002887305329194e-08, + "loss": 0.867, + "step": 9184 + }, + { + "epoch": 1.91, + "learning_rate": 2.9890242449839177e-08, + "loss": 0.7194, + "step": 9185 + }, + { + "epoch": 1.91, + "learning_rate": 2.9751930984468045e-08, + "loss": 0.9219, + "step": 9186 + }, + { + "epoch": 1.91, + "learning_rate": 2.961393867204032e-08, + "loss": 1.0214, + "step": 9187 + }, + { + "epoch": 1.91, + "learning_rate": 2.9476265527382473e-08, + "loss": 0.7348, + "step": 9188 + }, + { + "epoch": 1.91, + "learning_rate": 2.9338911565288008e-08, + "loss": 1.0428, + "step": 9189 + }, + { + "epoch": 1.91, + "learning_rate": 2.920187680051545e-08, + "loss": 0.6975, + "step": 9190 + }, + { + "epoch": 1.91, + "learning_rate": 2.9065161247789352e-08, + "loss": 0.8457, + "step": 9191 + }, + { + "epoch": 1.91, + "learning_rate": 2.8928764921799634e-08, + "loss": 1.0746, + "step": 9192 + }, + { + "epoch": 1.91, + "learning_rate": 2.8792687837202238e-08, + "loss": 0.8656, + "step": 9193 + }, + { + "epoch": 1.91, + "learning_rate": 2.8656930008618465e-08, + "loss": 0.799, + "step": 9194 + }, + { + "epoch": 1.91, + "learning_rate": 2.8521491450635652e-08, + "loss": 0.836, + "step": 9195 + }, + { + "epoch": 1.91, + "learning_rate": 2.838637217780615e-08, + "loss": 0.7688, + "step": 9196 + }, + { + "epoch": 1.91, + "learning_rate": 2.8251572204649357e-08, + "loss": 0.9457, + "step": 9197 + }, + { + "epoch": 1.91, + "learning_rate": 2.8117091545648677e-08, + "loss": 0.7907, + "step": 9198 + }, + { + "epoch": 1.91, + "learning_rate": 2.798293021525489e-08, + "loss": 0.7855, + "step": 9199 + }, + { + "epoch": 1.91, + "learning_rate": 2.784908822788279e-08, + "loss": 0.8825, + "step": 9200 + }, + { + "epoch": 1.91, + "learning_rate": 2.7715565597914216e-08, + "loss": 0.8302, + "step": 9201 + }, + { + "epoch": 1.91, + "learning_rate": 2.758236233969602e-08, + "loss": 1.0683, + "step": 9202 + }, + { + "epoch": 1.91, + "learning_rate": 2.7449478467540756e-08, + "loss": 0.7978, + "step": 9203 + }, + { + "epoch": 1.91, + "learning_rate": 2.7316913995726998e-08, + "loss": 0.9211, + "step": 9204 + }, + { + "epoch": 1.91, + "learning_rate": 2.718466893849869e-08, + "loss": 1.0069, + "step": 9205 + }, + { + "epoch": 1.91, + "learning_rate": 2.70527433100658e-08, + "loss": 0.9548, + "step": 9206 + }, + { + "epoch": 1.91, + "learning_rate": 2.6921137124603312e-08, + "loss": 0.7591, + "step": 9207 + }, + { + "epoch": 1.92, + "learning_rate": 2.678985039625226e-08, + "loss": 1.0447, + "step": 9208 + }, + { + "epoch": 1.92, + "learning_rate": 2.6658883139120016e-08, + "loss": 0.8782, + "step": 9209 + }, + { + "epoch": 1.92, + "learning_rate": 2.652823536727833e-08, + "loss": 0.8903, + "step": 9210 + }, + { + "epoch": 1.92, + "learning_rate": 2.639790709476564e-08, + "loss": 0.8285, + "step": 9211 + }, + { + "epoch": 1.92, + "learning_rate": 2.626789833558574e-08, + "loss": 0.806, + "step": 9212 + }, + { + "epoch": 1.92, + "learning_rate": 2.613820910370779e-08, + "loss": 0.856, + "step": 9213 + }, + { + "epoch": 1.92, + "learning_rate": 2.600883941306731e-08, + "loss": 0.8743, + "step": 9214 + }, + { + "epoch": 1.92, + "learning_rate": 2.5879789277564848e-08, + "loss": 0.7857, + "step": 9215 + }, + { + "epoch": 1.92, + "learning_rate": 2.575105871106631e-08, + "loss": 0.9136, + "step": 9216 + }, + { + "epoch": 1.92, + "learning_rate": 2.5622647727404635e-08, + "loss": 1.0114, + "step": 9217 + }, + { + "epoch": 1.92, + "learning_rate": 2.5494556340377118e-08, + "loss": 0.9757, + "step": 9218 + }, + { + "epoch": 1.92, + "learning_rate": 2.5366784563747412e-08, + "loss": 1.1459, + "step": 9219 + }, + { + "epoch": 1.92, + "learning_rate": 2.523933241124421e-08, + "loss": 0.9005, + "step": 9220 + }, + { + "epoch": 1.92, + "learning_rate": 2.5112199896562882e-08, + "loss": 0.8591, + "step": 9221 + }, + { + "epoch": 1.92, + "learning_rate": 2.4985387033362837e-08, + "loss": 0.8262, + "step": 9222 + }, + { + "epoch": 1.92, + "learning_rate": 2.485889383527118e-08, + "loss": 0.9362, + "step": 9223 + }, + { + "epoch": 1.92, + "learning_rate": 2.4732720315878697e-08, + "loss": 0.7278, + "step": 9224 + }, + { + "epoch": 1.92, + "learning_rate": 2.460686648874322e-08, + "loss": 1.1004, + "step": 9225 + }, + { + "epoch": 1.92, + "learning_rate": 2.4481332367387586e-08, + "loss": 0.6996, + "step": 9226 + }, + { + "epoch": 1.92, + "learning_rate": 2.4356117965300684e-08, + "loss": 0.7828, + "step": 9227 + }, + { + "epoch": 1.92, + "learning_rate": 2.423122329593641e-08, + "loss": 0.8114, + "step": 9228 + }, + { + "epoch": 1.92, + "learning_rate": 2.4106648372714702e-08, + "loss": 0.8243, + "step": 9229 + }, + { + "epoch": 1.92, + "learning_rate": 2.3982393209021515e-08, + "loss": 0.84, + "step": 9230 + }, + { + "epoch": 1.92, + "learning_rate": 2.3858457818207835e-08, + "loss": 1.0527, + "step": 9231 + }, + { + "epoch": 1.92, + "learning_rate": 2.3734842213590015e-08, + "loss": 1.0589, + "step": 9232 + }, + { + "epoch": 1.92, + "learning_rate": 2.3611546408451755e-08, + "loss": 0.876, + "step": 9233 + }, + { + "epoch": 1.92, + "learning_rate": 2.34885704160398e-08, + "loss": 0.8528, + "step": 9234 + }, + { + "epoch": 1.92, + "learning_rate": 2.336591424956891e-08, + "loss": 0.8423, + "step": 9235 + }, + { + "epoch": 1.92, + "learning_rate": 2.3243577922217875e-08, + "loss": 0.76, + "step": 9236 + }, + { + "epoch": 1.92, + "learning_rate": 2.3121561447132178e-08, + "loss": 1.0827, + "step": 9237 + }, + { + "epoch": 1.92, + "learning_rate": 2.2999864837422337e-08, + "loss": 0.9212, + "step": 9238 + }, + { + "epoch": 1.92, + "learning_rate": 2.2878488106164553e-08, + "loss": 0.9119, + "step": 9239 + }, + { + "epoch": 1.92, + "learning_rate": 2.2757431266400397e-08, + "loss": 0.9427, + "step": 9240 + }, + { + "epoch": 1.92, + "learning_rate": 2.2636694331138463e-08, + "loss": 0.8125, + "step": 9241 + }, + { + "epoch": 1.92, + "learning_rate": 2.2516277313350707e-08, + "loss": 0.8494, + "step": 9242 + }, + { + "epoch": 1.92, + "learning_rate": 2.2396180225976782e-08, + "loss": 0.8656, + "step": 9243 + }, + { + "epoch": 1.92, + "learning_rate": 2.2276403081920692e-08, + "loss": 0.8973, + "step": 9244 + }, + { + "epoch": 1.92, + "learning_rate": 2.2156945894052815e-08, + "loss": 0.8768, + "step": 9245 + }, + { + "epoch": 1.92, + "learning_rate": 2.2037808675208215e-08, + "loss": 0.7881, + "step": 9246 + }, + { + "epoch": 1.92, + "learning_rate": 2.1918991438188983e-08, + "loss": 0.9221, + "step": 9247 + }, + { + "epoch": 1.92, + "learning_rate": 2.1800494195761244e-08, + "loss": 1.085, + "step": 9248 + }, + { + "epoch": 1.92, + "learning_rate": 2.1682316960658146e-08, + "loss": 0.7009, + "step": 9249 + }, + { + "epoch": 1.92, + "learning_rate": 2.1564459745577526e-08, + "loss": 0.8416, + "step": 9250 + }, + { + "epoch": 1.92, + "learning_rate": 2.1446922563183258e-08, + "loss": 0.9352, + "step": 9251 + }, + { + "epoch": 1.92, + "learning_rate": 2.132970542610424e-08, + "loss": 0.7569, + "step": 9252 + }, + { + "epoch": 1.92, + "learning_rate": 2.1212808346936064e-08, + "loss": 0.7366, + "step": 9253 + }, + { + "epoch": 1.92, + "learning_rate": 2.1096231338239013e-08, + "loss": 0.9904, + "step": 9254 + }, + { + "epoch": 1.92, + "learning_rate": 2.0979974412539405e-08, + "loss": 0.9257, + "step": 9255 + }, + { + "epoch": 1.93, + "learning_rate": 2.086403758232891e-08, + "loss": 0.8063, + "step": 9256 + }, + { + "epoch": 1.93, + "learning_rate": 2.07484208600649e-08, + "loss": 1.0207, + "step": 9257 + }, + { + "epoch": 1.93, + "learning_rate": 2.0633124258170765e-08, + "loss": 0.9862, + "step": 9258 + }, + { + "epoch": 1.93, + "learning_rate": 2.0518147789034602e-08, + "loss": 0.775, + "step": 9259 + }, + { + "epoch": 1.93, + "learning_rate": 2.040349146501086e-08, + "loss": 0.8831, + "step": 9260 + }, + { + "epoch": 1.93, + "learning_rate": 2.028915529841935e-08, + "loss": 0.7759, + "step": 9261 + }, + { + "epoch": 1.93, + "learning_rate": 2.0175139301545574e-08, + "loss": 0.8699, + "step": 9262 + }, + { + "epoch": 1.93, + "learning_rate": 2.006144348664041e-08, + "loss": 0.937, + "step": 9263 + }, + { + "epoch": 1.93, + "learning_rate": 1.9948067865920405e-08, + "loss": 0.9638, + "step": 9264 + }, + { + "epoch": 1.93, + "learning_rate": 1.9835012451567825e-08, + "loss": 0.9003, + "step": 9265 + }, + { + "epoch": 1.93, + "learning_rate": 1.9722277255730614e-08, + "loss": 1.0386, + "step": 9266 + }, + { + "epoch": 1.93, + "learning_rate": 1.9609862290522084e-08, + "loss": 0.8489, + "step": 9267 + }, + { + "epoch": 1.93, + "learning_rate": 1.949776756802091e-08, + "loss": 0.8438, + "step": 9268 + }, + { + "epoch": 1.93, + "learning_rate": 1.938599310027245e-08, + "loss": 1.1895, + "step": 9269 + }, + { + "epoch": 1.93, + "learning_rate": 1.9274538899285765e-08, + "loss": 0.8883, + "step": 9270 + }, + { + "epoch": 1.93, + "learning_rate": 1.9163404977037613e-08, + "loss": 0.8033, + "step": 9271 + }, + { + "epoch": 1.93, + "learning_rate": 1.905259134546844e-08, + "loss": 0.7185, + "step": 9272 + }, + { + "epoch": 1.93, + "learning_rate": 1.8942098016486054e-08, + "loss": 0.9923, + "step": 9273 + }, + { + "epoch": 1.93, + "learning_rate": 1.8831925001962293e-08, + "loss": 0.7994, + "step": 9274 + }, + { + "epoch": 1.93, + "learning_rate": 1.8722072313735684e-08, + "loss": 0.8793, + "step": 9275 + }, + { + "epoch": 1.93, + "learning_rate": 1.8612539963609454e-08, + "loss": 0.9501, + "step": 9276 + }, + { + "epoch": 1.93, + "learning_rate": 1.8503327963353188e-08, + "loss": 0.9242, + "step": 9277 + }, + { + "epoch": 1.93, + "learning_rate": 1.839443632470117e-08, + "loss": 0.9017, + "step": 9278 + }, + { + "epoch": 1.93, + "learning_rate": 1.82858650593547e-08, + "loss": 0.9541, + "step": 9279 + }, + { + "epoch": 1.93, + "learning_rate": 1.817761417897912e-08, + "loss": 0.9085, + "step": 9280 + }, + { + "epoch": 1.93, + "learning_rate": 1.8069683695206117e-08, + "loss": 0.7357, + "step": 9281 + }, + { + "epoch": 1.93, + "learning_rate": 1.7962073619632757e-08, + "loss": 0.9553, + "step": 9282 + }, + { + "epoch": 1.93, + "learning_rate": 1.7854783963821784e-08, + "loss": 0.7799, + "step": 9283 + }, + { + "epoch": 1.93, + "learning_rate": 1.7747814739301316e-08, + "loss": 0.762, + "step": 9284 + }, + { + "epoch": 1.93, + "learning_rate": 1.7641165957565485e-08, + "loss": 0.9379, + "step": 9285 + }, + { + "epoch": 1.93, + "learning_rate": 1.7534837630073798e-08, + "loss": 0.8686, + "step": 9286 + }, + { + "epoch": 1.93, + "learning_rate": 1.7428829768250775e-08, + "loss": 0.7248, + "step": 9287 + }, + { + "epoch": 1.93, + "learning_rate": 1.7323142383487312e-08, + "loss": 0.8154, + "step": 9288 + }, + { + "epoch": 1.93, + "learning_rate": 1.7217775487138988e-08, + "loss": 0.6687, + "step": 9289 + }, + { + "epoch": 1.93, + "learning_rate": 1.7112729090528078e-08, + "loss": 1.3784, + "step": 9290 + }, + { + "epoch": 1.93, + "learning_rate": 1.7008003204941557e-08, + "loss": 0.929, + "step": 9291 + }, + { + "epoch": 1.93, + "learning_rate": 1.690359784163242e-08, + "loss": 0.8915, + "step": 9292 + }, + { + "epoch": 1.93, + "learning_rate": 1.6799513011818368e-08, + "loss": 1.1319, + "step": 9293 + }, + { + "epoch": 1.93, + "learning_rate": 1.6695748726684112e-08, + "loss": 1.0473, + "step": 9294 + }, + { + "epoch": 1.93, + "learning_rate": 1.659230499737874e-08, + "loss": 0.8772, + "step": 9295 + }, + { + "epoch": 1.93, + "learning_rate": 1.6489181835017353e-08, + "loss": 0.8931, + "step": 9296 + }, + { + "epoch": 1.93, + "learning_rate": 1.6386379250680427e-08, + "loss": 0.9262, + "step": 9297 + }, + { + "epoch": 1.93, + "learning_rate": 1.6283897255414125e-08, + "loss": 1.0051, + "step": 9298 + }, + { + "epoch": 1.93, + "learning_rate": 1.6181735860229973e-08, + "loss": 1.0091, + "step": 9299 + }, + { + "epoch": 1.93, + "learning_rate": 1.6079895076105522e-08, + "loss": 0.9767, + "step": 9300 + }, + { + "epoch": 1.93, + "learning_rate": 1.5978374913983685e-08, + "loss": 0.8504, + "step": 9301 + }, + { + "epoch": 1.93, + "learning_rate": 1.5877175384772068e-08, + "loss": 0.8838, + "step": 9302 + }, + { + "epoch": 1.93, + "learning_rate": 1.5776296499345312e-08, + "loss": 1.0039, + "step": 9303 + }, + { + "epoch": 1.94, + "learning_rate": 1.5675738268542406e-08, + "loss": 0.7915, + "step": 9304 + }, + { + "epoch": 1.94, + "learning_rate": 1.5575500703168712e-08, + "loss": 1.1115, + "step": 9305 + }, + { + "epoch": 1.94, + "learning_rate": 1.5475583813994277e-08, + "loss": 0.8267, + "step": 9306 + }, + { + "epoch": 1.94, + "learning_rate": 1.5375987611755184e-08, + "loss": 0.7797, + "step": 9307 + }, + { + "epoch": 1.94, + "learning_rate": 1.5276712107153535e-08, + "loss": 0.9741, + "step": 9308 + }, + { + "epoch": 1.94, + "learning_rate": 1.5177757310856132e-08, + "loss": 0.8075, + "step": 9309 + }, + { + "epoch": 1.94, + "learning_rate": 1.5079123233495807e-08, + "loss": 0.9932, + "step": 9310 + }, + { + "epoch": 1.94, + "learning_rate": 1.498080988567041e-08, + "loss": 0.8655, + "step": 9311 + }, + { + "epoch": 1.94, + "learning_rate": 1.4882817277944494e-08, + "loss": 0.7531, + "step": 9312 + }, + { + "epoch": 1.94, + "learning_rate": 1.4785145420846303e-08, + "loss": 0.8019, + "step": 9313 + }, + { + "epoch": 1.94, + "learning_rate": 1.468779432487144e-08, + "loss": 0.9279, + "step": 9314 + }, + { + "epoch": 1.94, + "learning_rate": 1.4590764000480205e-08, + "loss": 1.1701, + "step": 9315 + }, + { + "epoch": 1.94, + "learning_rate": 1.4494054458098261e-08, + "loss": 0.7454, + "step": 9316 + }, + { + "epoch": 1.94, + "learning_rate": 1.4397665708116958e-08, + "loss": 0.7786, + "step": 9317 + }, + { + "epoch": 1.94, + "learning_rate": 1.430159776089368e-08, + "loss": 0.9736, + "step": 9318 + }, + { + "epoch": 1.94, + "learning_rate": 1.4205850626750505e-08, + "loss": 0.9243, + "step": 9319 + }, + { + "epoch": 1.94, + "learning_rate": 1.4110424315975534e-08, + "loss": 1.0202, + "step": 9320 + }, + { + "epoch": 1.94, + "learning_rate": 1.4015318838822566e-08, + "loss": 1.0844, + "step": 9321 + }, + { + "epoch": 1.94, + "learning_rate": 1.3920534205510427e-08, + "loss": 1.0493, + "step": 9322 + }, + { + "epoch": 1.94, + "learning_rate": 1.3826070426223636e-08, + "loss": 0.9519, + "step": 9323 + }, + { + "epoch": 1.94, + "learning_rate": 1.373192751111274e-08, + "loss": 0.9299, + "step": 9324 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638105470293315e-08, + "loss": 0.9274, + "step": 9325 + }, + { + "epoch": 1.94, + "learning_rate": 1.3544604313845966e-08, + "loss": 0.9223, + "step": 9326 + }, + { + "epoch": 1.94, + "learning_rate": 1.3451424051817984e-08, + "loss": 1.1522, + "step": 9327 + }, + { + "epoch": 1.94, + "learning_rate": 1.3358564694221365e-08, + "loss": 0.9063, + "step": 9328 + }, + { + "epoch": 1.94, + "learning_rate": 1.3266026251033791e-08, + "loss": 0.9807, + "step": 9329 + }, + { + "epoch": 1.94, + "learning_rate": 1.3173808732198645e-08, + "loss": 0.8201, + "step": 9330 + }, + { + "epoch": 1.94, + "learning_rate": 1.3081912147624331e-08, + "loss": 0.8861, + "step": 9331 + }, + { + "epoch": 1.94, + "learning_rate": 1.2990336507185618e-08, + "loss": 0.8967, + "step": 9332 + }, + { + "epoch": 1.94, + "learning_rate": 1.2899081820722302e-08, + "loss": 0.9104, + "step": 9333 + }, + { + "epoch": 1.94, + "learning_rate": 1.2808148098039207e-08, + "loss": 0.9634, + "step": 9334 + }, + { + "epoch": 1.94, + "learning_rate": 1.2717535348907516e-08, + "loss": 0.7919, + "step": 9335 + }, + { + "epoch": 1.94, + "learning_rate": 1.2627243583063442e-08, + "loss": 1.0665, + "step": 9336 + }, + { + "epoch": 1.94, + "learning_rate": 1.2537272810208889e-08, + "loss": 0.8108, + "step": 9337 + }, + { + "epoch": 1.94, + "learning_rate": 1.2447623040011458e-08, + "loss": 0.8049, + "step": 9338 + }, + { + "epoch": 1.94, + "learning_rate": 1.2358294282103445e-08, + "loss": 0.789, + "step": 9339 + }, + { + "epoch": 1.94, + "learning_rate": 1.2269286546083502e-08, + "loss": 0.8283, + "step": 9340 + }, + { + "epoch": 1.94, + "learning_rate": 1.2180599841515649e-08, + "loss": 0.9602, + "step": 9341 + }, + { + "epoch": 1.94, + "learning_rate": 1.2092234177928929e-08, + "loss": 1.1291, + "step": 9342 + }, + { + "epoch": 1.94, + "learning_rate": 1.2004189564818747e-08, + "loss": 0.9021, + "step": 9343 + }, + { + "epoch": 1.94, + "learning_rate": 1.19164660116452e-08, + "loss": 0.8367, + "step": 9344 + }, + { + "epoch": 1.94, + "learning_rate": 1.182906352783375e-08, + "loss": 0.8584, + "step": 9345 + }, + { + "epoch": 1.94, + "learning_rate": 1.1741982122776552e-08, + "loss": 1.0024, + "step": 9346 + }, + { + "epoch": 1.94, + "learning_rate": 1.1655221805829786e-08, + "loss": 1.0859, + "step": 9347 + }, + { + "epoch": 1.94, + "learning_rate": 1.1568782586316662e-08, + "loss": 0.8683, + "step": 9348 + }, + { + "epoch": 1.94, + "learning_rate": 1.1482664473524418e-08, + "loss": 1.0169, + "step": 9349 + }, + { + "epoch": 1.94, + "learning_rate": 1.1396867476706318e-08, + "loss": 0.7798, + "step": 9350 + }, + { + "epoch": 1.94, + "learning_rate": 1.131139160508199e-08, + "loss": 0.811, + "step": 9351 + }, + { + "epoch": 1.95, + "learning_rate": 1.1226236867835082e-08, + "loss": 0.8437, + "step": 9352 + }, + { + "epoch": 1.95, + "learning_rate": 1.1141403274115947e-08, + "loss": 0.9147, + "step": 9353 + }, + { + "epoch": 1.95, + "learning_rate": 1.1056890833039623e-08, + "loss": 0.8451, + "step": 9354 + }, + { + "epoch": 1.95, + "learning_rate": 1.097269955368685e-08, + "loss": 0.9183, + "step": 9355 + }, + { + "epoch": 1.95, + "learning_rate": 1.088882944510472e-08, + "loss": 0.8147, + "step": 9356 + }, + { + "epoch": 1.95, + "learning_rate": 1.0805280516304362e-08, + "loss": 1.1017, + "step": 9357 + }, + { + "epoch": 1.95, + "learning_rate": 1.0722052776263259e-08, + "loss": 0.9082, + "step": 9358 + }, + { + "epoch": 1.95, + "learning_rate": 1.0639146233924257e-08, + "loss": 0.9572, + "step": 9359 + }, + { + "epoch": 1.95, + "learning_rate": 1.0556560898195566e-08, + "loss": 0.8201, + "step": 9360 + }, + { + "epoch": 1.95, + "learning_rate": 1.047429677795142e-08, + "loss": 0.6659, + "step": 9361 + }, + { + "epoch": 1.95, + "learning_rate": 1.0392353882030414e-08, + "loss": 1.0687, + "step": 9362 + }, + { + "epoch": 1.95, + "learning_rate": 1.031073221923784e-08, + "loss": 0.6886, + "step": 9363 + }, + { + "epoch": 1.95, + "learning_rate": 1.022943179834368e-08, + "loss": 0.6952, + "step": 9364 + }, + { + "epoch": 1.95, + "learning_rate": 1.0148452628083948e-08, + "loss": 1.0174, + "step": 9365 + }, + { + "epoch": 1.95, + "learning_rate": 1.0067794717159351e-08, + "loss": 0.909, + "step": 9366 + }, + { + "epoch": 1.95, + "learning_rate": 9.987458074236954e-09, + "loss": 0.8347, + "step": 9367 + }, + { + "epoch": 1.95, + "learning_rate": 9.907442707948854e-09, + "loss": 0.9306, + "step": 9368 + }, + { + "epoch": 1.95, + "learning_rate": 9.827748626892508e-09, + "loss": 0.8206, + "step": 9369 + }, + { + "epoch": 1.95, + "learning_rate": 9.748375839631396e-09, + "loss": 0.9232, + "step": 9370 + }, + { + "epoch": 1.95, + "learning_rate": 9.669324354693698e-09, + "loss": 1.0115, + "step": 9371 + }, + { + "epoch": 1.95, + "learning_rate": 9.590594180573952e-09, + "loss": 0.8248, + "step": 9372 + }, + { + "epoch": 1.95, + "learning_rate": 9.512185325731392e-09, + "loss": 1.0107, + "step": 9373 + }, + { + "epoch": 1.95, + "learning_rate": 9.434097798590946e-09, + "loss": 0.7921, + "step": 9374 + }, + { + "epoch": 1.95, + "learning_rate": 9.356331607543567e-09, + "loss": 1.0732, + "step": 9375 + }, + { + "epoch": 1.95, + "learning_rate": 9.278886760944904e-09, + "loss": 0.8632, + "step": 9376 + }, + { + "epoch": 1.95, + "learning_rate": 9.201763267116303e-09, + "loss": 0.8779, + "step": 9377 + }, + { + "epoch": 1.95, + "learning_rate": 9.1249611343448e-09, + "loss": 1.0984, + "step": 9378 + }, + { + "epoch": 1.95, + "learning_rate": 9.048480370882794e-09, + "loss": 0.7815, + "step": 9379 + }, + { + "epoch": 1.95, + "learning_rate": 8.972320984948379e-09, + "loss": 0.9998, + "step": 9380 + }, + { + "epoch": 1.95, + "learning_rate": 8.89648298472434e-09, + "loss": 0.9949, + "step": 9381 + }, + { + "epoch": 1.95, + "learning_rate": 8.820966378360162e-09, + "loss": 0.8523, + "step": 9382 + }, + { + "epoch": 1.95, + "learning_rate": 8.745771173969352e-09, + "loss": 0.6665, + "step": 9383 + }, + { + "epoch": 1.95, + "learning_rate": 8.670897379632114e-09, + "loss": 0.8034, + "step": 9384 + }, + { + "epoch": 1.95, + "learning_rate": 8.596345003393347e-09, + "loss": 0.8166, + "step": 9385 + }, + { + "epoch": 1.95, + "learning_rate": 8.522114053263975e-09, + "loss": 0.9982, + "step": 9386 + }, + { + "epoch": 1.95, + "learning_rate": 8.44820453721995e-09, + "loss": 0.8456, + "step": 9387 + }, + { + "epoch": 1.95, + "learning_rate": 8.374616463202922e-09, + "loss": 0.8333, + "step": 9388 + }, + { + "epoch": 1.95, + "learning_rate": 8.301349839119899e-09, + "loss": 0.8086, + "step": 9389 + }, + { + "epoch": 1.95, + "learning_rate": 8.228404672843248e-09, + "loss": 1.0816, + "step": 9390 + }, + { + "epoch": 1.95, + "learning_rate": 8.155780972211036e-09, + "loss": 0.9094, + "step": 9391 + }, + { + "epoch": 1.95, + "learning_rate": 8.083478745027018e-09, + "loss": 0.7904, + "step": 9392 + }, + { + "epoch": 1.95, + "learning_rate": 8.011497999059313e-09, + "loss": 0.8643, + "step": 9393 + }, + { + "epoch": 1.95, + "learning_rate": 7.939838742042737e-09, + "loss": 0.9973, + "step": 9394 + }, + { + "epoch": 1.95, + "learning_rate": 7.868500981676795e-09, + "loss": 0.8734, + "step": 9395 + }, + { + "epoch": 1.95, + "learning_rate": 7.797484725627357e-09, + "loss": 1.0016, + "step": 9396 + }, + { + "epoch": 1.95, + "learning_rate": 7.726789981524318e-09, + "loss": 1.1504, + "step": 9397 + }, + { + "epoch": 1.95, + "learning_rate": 7.656416756964269e-09, + "loss": 0.8022, + "step": 9398 + }, + { + "epoch": 1.95, + "learning_rate": 7.586365059508493e-09, + "loss": 0.7792, + "step": 9399 + }, + { + "epoch": 1.96, + "learning_rate": 7.516634896684638e-09, + "loss": 0.9681, + "step": 9400 + }, + { + "epoch": 1.96, + "learning_rate": 7.447226275984709e-09, + "loss": 0.699, + "step": 9401 + }, + { + "epoch": 1.96, + "learning_rate": 7.378139204866741e-09, + "loss": 1.1605, + "step": 9402 + }, + { + "epoch": 1.96, + "learning_rate": 7.309373690754129e-09, + "loss": 1.1659, + "step": 9403 + }, + { + "epoch": 1.96, + "learning_rate": 7.240929741035629e-09, + "loss": 0.7989, + "step": 9404 + }, + { + "epoch": 1.96, + "learning_rate": 7.172807363066025e-09, + "loss": 0.9939, + "step": 9405 + }, + { + "epoch": 1.96, + "learning_rate": 7.105006564164462e-09, + "loss": 1.0761, + "step": 9406 + }, + { + "epoch": 1.96, + "learning_rate": 7.037527351616446e-09, + "loss": 0.7914, + "step": 9407 + }, + { + "epoch": 1.96, + "learning_rate": 6.97036973267251e-09, + "loss": 0.9987, + "step": 9408 + }, + { + "epoch": 1.96, + "learning_rate": 6.903533714548549e-09, + "loss": 1.0748, + "step": 9409 + }, + { + "epoch": 1.96, + "learning_rate": 6.837019304426484e-09, + "loss": 0.8787, + "step": 9410 + }, + { + "epoch": 1.96, + "learning_rate": 6.770826509453265e-09, + "loss": 0.7561, + "step": 9411 + }, + { + "epoch": 1.96, + "learning_rate": 6.704955336741203e-09, + "loss": 0.8824, + "step": 9412 + }, + { + "epoch": 1.96, + "learning_rate": 6.63940579336797e-09, + "loss": 0.8156, + "step": 9413 + }, + { + "epoch": 1.96, + "learning_rate": 6.574177886376598e-09, + "loss": 0.9266, + "step": 9414 + }, + { + "epoch": 1.96, + "learning_rate": 6.509271622776813e-09, + "loss": 0.8748, + "step": 9415 + }, + { + "epoch": 1.96, + "learning_rate": 6.444687009541706e-09, + "loss": 0.9234, + "step": 9416 + }, + { + "epoch": 1.96, + "learning_rate": 6.380424053611722e-09, + "loss": 0.8841, + "step": 9417 + }, + { + "epoch": 1.96, + "learning_rate": 6.31648276189134e-09, + "loss": 0.9828, + "step": 9418 + }, + { + "epoch": 1.96, + "learning_rate": 6.252863141251397e-09, + "loss": 0.9662, + "step": 9419 + }, + { + "epoch": 1.96, + "learning_rate": 6.189565198527425e-09, + "loss": 1.2253, + "step": 9420 + }, + { + "epoch": 1.96, + "learning_rate": 6.126588940521316e-09, + "loss": 0.8046, + "step": 9421 + }, + { + "epoch": 1.96, + "learning_rate": 6.063934373999325e-09, + "loss": 1.2102, + "step": 9422 + }, + { + "epoch": 1.96, + "learning_rate": 6.001601505694398e-09, + "loss": 0.9985, + "step": 9423 + }, + { + "epoch": 1.96, + "learning_rate": 5.9395903423035136e-09, + "loss": 1.0472, + "step": 9424 + }, + { + "epoch": 1.96, + "learning_rate": 5.877900890489673e-09, + "loss": 0.9826, + "step": 9425 + }, + { + "epoch": 1.96, + "learning_rate": 5.816533156881909e-09, + "loss": 0.9709, + "step": 9426 + }, + { + "epoch": 1.96, + "learning_rate": 5.755487148074279e-09, + "loss": 0.9924, + "step": 9427 + }, + { + "epoch": 1.96, + "learning_rate": 5.694762870625536e-09, + "loss": 0.9969, + "step": 9428 + }, + { + "epoch": 1.96, + "learning_rate": 5.634360331061128e-09, + "loss": 0.973, + "step": 9429 + }, + { + "epoch": 1.96, + "learning_rate": 5.574279535870863e-09, + "loss": 0.7249, + "step": 9430 + }, + { + "epoch": 1.96, + "learning_rate": 5.514520491510577e-09, + "loss": 0.8367, + "step": 9431 + }, + { + "epoch": 1.96, + "learning_rate": 5.455083204401468e-09, + "loss": 0.9923, + "step": 9432 + }, + { + "epoch": 1.96, + "learning_rate": 5.395967680930092e-09, + "loss": 0.9364, + "step": 9433 + }, + { + "epoch": 1.96, + "learning_rate": 5.337173927448036e-09, + "loss": 0.6087, + "step": 9434 + }, + { + "epoch": 1.96, + "learning_rate": 5.278701950272913e-09, + "loss": 1.1157, + "step": 9435 + }, + { + "epoch": 1.96, + "learning_rate": 5.22055175568803e-09, + "loss": 0.9118, + "step": 9436 + }, + { + "epoch": 1.96, + "learning_rate": 5.162723349940723e-09, + "loss": 1.0963, + "step": 9437 + }, + { + "epoch": 1.96, + "learning_rate": 5.1052167392453555e-09, + "loss": 0.722, + "step": 9438 + }, + { + "epoch": 1.96, + "learning_rate": 5.048031929780317e-09, + "loss": 1.0757, + "step": 9439 + }, + { + "epoch": 1.96, + "learning_rate": 4.991168927691026e-09, + "loss": 1.169, + "step": 9440 + }, + { + "epoch": 1.96, + "learning_rate": 4.934627739086595e-09, + "loss": 1.0266, + "step": 9441 + }, + { + "epoch": 1.96, + "learning_rate": 4.8784083700428305e-09, + "loss": 0.8524, + "step": 9442 + }, + { + "epoch": 1.96, + "learning_rate": 4.822510826600235e-09, + "loss": 0.9018, + "step": 9443 + }, + { + "epoch": 1.96, + "learning_rate": 4.766935114765336e-09, + "loss": 0.912, + "step": 9444 + }, + { + "epoch": 1.96, + "learning_rate": 4.711681240509358e-09, + "loss": 0.8718, + "step": 9445 + }, + { + "epoch": 1.96, + "learning_rate": 4.656749209769551e-09, + "loss": 0.8661, + "step": 9446 + }, + { + "epoch": 1.96, + "learning_rate": 4.602139028448193e-09, + "loss": 0.8341, + "step": 9447 + }, + { + "epoch": 1.97, + "learning_rate": 4.547850702413259e-09, + "loss": 0.8357, + "step": 9448 + }, + { + "epoch": 1.97, + "learning_rate": 4.493884237498081e-09, + "loss": 1.2808, + "step": 9449 + }, + { + "epoch": 1.97, + "learning_rate": 4.440239639501353e-09, + "loss": 0.9535, + "step": 9450 + }, + { + "epoch": 1.97, + "learning_rate": 4.386916914187133e-09, + "loss": 0.9033, + "step": 9451 + }, + { + "epoch": 1.97, + "learning_rate": 4.333916067284838e-09, + "loss": 0.8978, + "step": 9452 + }, + { + "epoch": 1.97, + "learning_rate": 4.281237104489577e-09, + "loss": 0.8539, + "step": 9453 + }, + { + "epoch": 1.97, + "learning_rate": 4.22888003146149e-09, + "loss": 0.753, + "step": 9454 + }, + { + "epoch": 1.97, + "learning_rate": 4.176844853826411e-09, + "loss": 1.0204, + "step": 9455 + }, + { + "epoch": 1.97, + "learning_rate": 4.125131577175867e-09, + "loss": 0.8878, + "step": 9456 + }, + { + "epoch": 1.97, + "learning_rate": 4.073740207066079e-09, + "loss": 0.855, + "step": 9457 + }, + { + "epoch": 1.97, + "learning_rate": 4.022670749018964e-09, + "loss": 0.6972, + "step": 9458 + }, + { + "epoch": 1.97, + "learning_rate": 3.971923208522133e-09, + "loss": 0.8768, + "step": 9459 + }, + { + "epoch": 1.97, + "learning_rate": 3.921497591028555e-09, + "loss": 1.011, + "step": 9460 + }, + { + "epoch": 1.97, + "learning_rate": 3.871393901955899e-09, + "loss": 1.0701, + "step": 9461 + }, + { + "epoch": 1.97, + "learning_rate": 3.8216121466885246e-09, + "loss": 0.8742, + "step": 9462 + }, + { + "epoch": 1.97, + "learning_rate": 3.7721523305748184e-09, + "loss": 0.9479, + "step": 9463 + }, + { + "epoch": 1.97, + "learning_rate": 3.723014458929863e-09, + "loss": 0.8709, + "step": 9464 + }, + { + "epoch": 1.97, + "learning_rate": 3.6741985370327692e-09, + "loss": 1.1206, + "step": 9465 + }, + { + "epoch": 1.97, + "learning_rate": 3.6257045701296733e-09, + "loss": 0.9244, + "step": 9466 + }, + { + "epoch": 1.97, + "learning_rate": 3.5775325634304077e-09, + "loss": 0.699, + "step": 9467 + }, + { + "epoch": 1.97, + "learning_rate": 3.5296825221118324e-09, + "loss": 0.9128, + "step": 9468 + }, + { + "epoch": 1.97, + "learning_rate": 3.482154451314834e-09, + "loss": 1.1884, + "step": 9469 + }, + { + "epoch": 1.97, + "learning_rate": 3.434948356146328e-09, + "loss": 1.0082, + "step": 9470 + }, + { + "epoch": 1.97, + "learning_rate": 3.3880642416792563e-09, + "loss": 0.9822, + "step": 9471 + }, + { + "epoch": 1.97, + "learning_rate": 3.3415021129505894e-09, + "loss": 0.8013, + "step": 9472 + } + ], + "logging_steps": 1, + "max_steps": 9616, + "num_train_epochs": 2, + "save_steps": 256, + "total_flos": 1.101418869958692e+19, + "trial_name": null, + "trial_params": null +}