{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.925570228091236, "eval_steps": 209, "global_step": 4998, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.1426, "step": 1 }, { "epoch": 0.0, "eval_loss": 2.071432113647461, "eval_runtime": 279.6718, "eval_samples_per_second": 0.737, "eval_steps_per_second": 0.737, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.4033, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 2.1893, "step": 3 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 2.3226, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 2.2485, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 1.9704, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 1.6929, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 2.2957, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 1.9907, "step": 9 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.1295, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019999999287109068, "loss": 2.2249, "step": 11 }, { "epoch": 0.01, "learning_rate": 0.00019999997148436365, "loss": 2.1733, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.000199999935839822, "loss": 2.1404, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.00019999988593747084, "loss": 2.0236, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.00019999982177731722, "loss": 1.9639, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.00019999974335937034, "loss": 1.692, "step": 16 }, { "epoch": 0.02, "learning_rate": 0.00019999965068364137, "loss": 2.3609, "step": 17 }, { "epoch": 0.02, "learning_rate": 0.00019999954375014348, "loss": 2.3553, "step": 18 }, { "epoch": 0.02, "learning_rate": 0.00019999942255889198, "loss": 1.5733, "step": 19 }, { "epoch": 0.02, "learning_rate": 0.00019999928710990412, "loss": 1.7505, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00019999913740319922, "loss": 2.3068, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.00019999897343879862, "loss": 1.8371, "step": 22 }, { "epoch": 0.03, "learning_rate": 0.0001999987952167257, "loss": 1.9852, "step": 23 }, { "epoch": 0.03, "learning_rate": 0.00019999860273700585, "loss": 1.9625, "step": 24 }, { "epoch": 0.03, "learning_rate": 0.00019999839599966655, "loss": 2.1089, "step": 25 }, { "epoch": 0.03, "learning_rate": 0.00019999817500473724, "loss": 2.1086, "step": 26 }, { "epoch": 0.03, "learning_rate": 0.00019999793975224945, "loss": 2.0284, "step": 27 }, { "epoch": 0.03, "learning_rate": 0.00019999769024223673, "loss": 2.3641, "step": 28 }, { "epoch": 0.03, "learning_rate": 0.00019999742647473464, "loss": 1.963, "step": 29 }, { "epoch": 0.04, "learning_rate": 0.00019999714844978078, "loss": 2.0635, "step": 30 }, { "epoch": 0.04, "learning_rate": 0.0001999968561674148, "loss": 1.9304, "step": 31 }, { "epoch": 0.04, "learning_rate": 0.00019999654962767839, "loss": 1.4124, "step": 32 }, { "epoch": 0.04, "learning_rate": 0.00019999622883061518, "loss": 2.1444, "step": 33 }, { "epoch": 0.04, "learning_rate": 0.00019999589377627102, "loss": 1.6477, "step": 34 }, { "epoch": 0.04, "learning_rate": 0.0001999955444646936, "loss": 2.2601, "step": 35 }, { "epoch": 0.04, "learning_rate": 0.00019999518089593282, "loss": 1.6256, "step": 36 }, { "epoch": 0.04, "learning_rate": 0.0001999948030700404, "loss": 1.9155, "step": 37 }, { "epoch": 0.05, "learning_rate": 0.00019999441098707025, "loss": 2.1408, "step": 38 }, { "epoch": 0.05, "learning_rate": 0.00019999400464707832, "loss": 2.104, "step": 39 }, { "epoch": 0.05, "learning_rate": 0.0001999935840501225, "loss": 1.9841, "step": 40 }, { "epoch": 0.05, "learning_rate": 0.00019999314919626272, "loss": 1.5924, "step": 41 }, { "epoch": 0.05, "learning_rate": 0.00019999270008556108, "loss": 1.9956, "step": 42 }, { "epoch": 0.05, "learning_rate": 0.00019999223671808154, "loss": 1.4673, "step": 43 }, { "epoch": 0.05, "learning_rate": 0.00019999175909389018, "loss": 2.1595, "step": 44 }, { "epoch": 0.05, "learning_rate": 0.00019999126721305513, "loss": 1.8439, "step": 45 }, { "epoch": 0.06, "learning_rate": 0.00019999076107564648, "loss": 1.9961, "step": 46 }, { "epoch": 0.06, "learning_rate": 0.00019999024068173638, "loss": 2.1504, "step": 47 }, { "epoch": 0.06, "learning_rate": 0.00019998970603139912, "loss": 2.2907, "step": 48 }, { "epoch": 0.06, "learning_rate": 0.0001999891571247108, "loss": 1.5709, "step": 49 }, { "epoch": 0.06, "learning_rate": 0.0001999885939617498, "loss": 2.4504, "step": 50 }, { "epoch": 0.06, "learning_rate": 0.00019998801654259632, "loss": 2.3787, "step": 51 }, { "epoch": 0.06, "learning_rate": 0.0001999874248673328, "loss": 2.0434, "step": 52 }, { "epoch": 0.06, "learning_rate": 0.00019998681893604347, "loss": 2.1671, "step": 53 }, { "epoch": 0.06, "learning_rate": 0.0001999861987488148, "loss": 1.7432, "step": 54 }, { "epoch": 0.07, "learning_rate": 0.00019998556430573521, "loss": 1.7737, "step": 55 }, { "epoch": 0.07, "learning_rate": 0.00019998491560689513, "loss": 2.0122, "step": 56 }, { "epoch": 0.07, "learning_rate": 0.0001999842526523871, "loss": 1.7545, "step": 57 }, { "epoch": 0.07, "learning_rate": 0.00019998357544230558, "loss": 2.201, "step": 58 }, { "epoch": 0.07, "learning_rate": 0.00019998288397674716, "loss": 2.0396, "step": 59 }, { "epoch": 0.07, "learning_rate": 0.0001999821782558104, "loss": 1.9275, "step": 60 }, { "epoch": 0.07, "learning_rate": 0.00019998145827959598, "loss": 1.7797, "step": 61 }, { "epoch": 0.07, "learning_rate": 0.0001999807240482065, "loss": 2.1463, "step": 62 }, { "epoch": 0.08, "learning_rate": 0.00019997997556174665, "loss": 1.935, "step": 63 }, { "epoch": 0.08, "learning_rate": 0.0001999792128203232, "loss": 2.1182, "step": 64 }, { "epoch": 0.08, "learning_rate": 0.0001999784358240448, "loss": 2.2297, "step": 65 }, { "epoch": 0.08, "learning_rate": 0.00019997764457302234, "loss": 2.1052, "step": 66 }, { "epoch": 0.08, "learning_rate": 0.0001999768390673686, "loss": 2.0777, "step": 67 }, { "epoch": 0.08, "learning_rate": 0.00019997601930719835, "loss": 2.1419, "step": 68 }, { "epoch": 0.08, "learning_rate": 0.0001999751852926286, "loss": 2.2586, "step": 69 }, { "epoch": 0.08, "learning_rate": 0.00019997433702377817, "loss": 1.9089, "step": 70 }, { "epoch": 0.09, "learning_rate": 0.00019997347450076801, "loss": 2.0587, "step": 71 }, { "epoch": 0.09, "learning_rate": 0.00019997259772372116, "loss": 2.4143, "step": 72 }, { "epoch": 0.09, "learning_rate": 0.00019997170669276256, "loss": 1.947, "step": 73 }, { "epoch": 0.09, "learning_rate": 0.00019997080140801932, "loss": 2.008, "step": 74 }, { "epoch": 0.09, "learning_rate": 0.00019996988186962041, "loss": 2.4912, "step": 75 }, { "epoch": 0.09, "learning_rate": 0.00019996894807769707, "loss": 2.0279, "step": 76 }, { "epoch": 0.09, "learning_rate": 0.00019996800003238232, "loss": 1.9914, "step": 77 }, { "epoch": 0.09, "learning_rate": 0.0001999670377338114, "loss": 1.9091, "step": 78 }, { "epoch": 0.09, "learning_rate": 0.00019996606118212148, "loss": 1.8038, "step": 79 }, { "epoch": 0.1, "learning_rate": 0.00019996507037745183, "loss": 2.3573, "step": 80 }, { "epoch": 0.1, "learning_rate": 0.00019996406531994364, "loss": 2.3204, "step": 81 }, { "epoch": 0.1, "learning_rate": 0.0001999630460097403, "loss": 2.1619, "step": 82 }, { "epoch": 0.1, "learning_rate": 0.0001999620124469871, "loss": 1.9977, "step": 83 }, { "epoch": 0.1, "learning_rate": 0.00019996096463183142, "loss": 2.195, "step": 84 }, { "epoch": 0.1, "learning_rate": 0.00019995990256442263, "loss": 1.9909, "step": 85 }, { "epoch": 0.1, "learning_rate": 0.00019995882624491217, "loss": 2.2001, "step": 86 }, { "epoch": 0.1, "learning_rate": 0.00019995773567345354, "loss": 1.5795, "step": 87 }, { "epoch": 0.11, "learning_rate": 0.00019995663085020212, "loss": 2.174, "step": 88 }, { "epoch": 0.11, "learning_rate": 0.00019995551177531557, "loss": 1.9605, "step": 89 }, { "epoch": 0.11, "learning_rate": 0.00019995437844895334, "loss": 2.1768, "step": 90 }, { "epoch": 0.11, "learning_rate": 0.0001999532308712771, "loss": 1.6906, "step": 91 }, { "epoch": 0.11, "learning_rate": 0.00019995206904245037, "loss": 2.1029, "step": 92 }, { "epoch": 0.11, "learning_rate": 0.00019995089296263893, "loss": 2.0652, "step": 93 }, { "epoch": 0.11, "learning_rate": 0.00019994970263201035, "loss": 2.1733, "step": 94 }, { "epoch": 0.11, "learning_rate": 0.0001999484980507344, "loss": 1.9413, "step": 95 }, { "epoch": 0.12, "learning_rate": 0.0001999472792189828, "loss": 1.9538, "step": 96 }, { "epoch": 0.12, "learning_rate": 0.00019994604613692935, "loss": 2.4158, "step": 97 }, { "epoch": 0.12, "learning_rate": 0.00019994479880474988, "loss": 1.8964, "step": 98 }, { "epoch": 0.12, "learning_rate": 0.0001999435372226222, "loss": 2.3135, "step": 99 }, { "epoch": 0.12, "learning_rate": 0.0001999422613907262, "loss": 2.127, "step": 100 }, { "epoch": 0.12, "learning_rate": 0.00019994097130924374, "loss": 1.9954, "step": 101 }, { "epoch": 0.12, "learning_rate": 0.00019993966697835883, "loss": 2.1363, "step": 102 }, { "epoch": 0.12, "learning_rate": 0.00019993834839825738, "loss": 1.7779, "step": 103 }, { "epoch": 0.12, "learning_rate": 0.00019993701556912742, "loss": 2.0923, "step": 104 }, { "epoch": 0.13, "learning_rate": 0.00019993566849115898, "loss": 1.9183, "step": 105 }, { "epoch": 0.13, "learning_rate": 0.00019993430716454413, "loss": 1.7894, "step": 106 }, { "epoch": 0.13, "learning_rate": 0.00019993293158947694, "loss": 2.0094, "step": 107 }, { "epoch": 0.13, "learning_rate": 0.0001999315417661536, "loss": 2.1469, "step": 108 }, { "epoch": 0.13, "learning_rate": 0.0001999301376947722, "loss": 1.6924, "step": 109 }, { "epoch": 0.13, "learning_rate": 0.0001999287193755329, "loss": 2.1794, "step": 110 }, { "epoch": 0.13, "learning_rate": 0.000199927286808638, "loss": 2.1338, "step": 111 }, { "epoch": 0.13, "learning_rate": 0.00019992583999429178, "loss": 1.9988, "step": 112 }, { "epoch": 0.14, "learning_rate": 0.0001999243789327004, "loss": 2.0735, "step": 113 }, { "epoch": 0.14, "learning_rate": 0.0001999229036240723, "loss": 2.0521, "step": 114 }, { "epoch": 0.14, "learning_rate": 0.00019992141406861776, "loss": 1.9441, "step": 115 }, { "epoch": 0.14, "learning_rate": 0.00019991991026654918, "loss": 2.1244, "step": 116 }, { "epoch": 0.14, "learning_rate": 0.0001999183922180809, "loss": 1.7937, "step": 117 }, { "epoch": 0.14, "learning_rate": 0.0001999168599234295, "loss": 2.2603, "step": 118 }, { "epoch": 0.14, "learning_rate": 0.00019991531338281332, "loss": 2.1846, "step": 119 }, { "epoch": 0.14, "learning_rate": 0.00019991375259645293, "loss": 2.3241, "step": 120 }, { "epoch": 0.15, "learning_rate": 0.00019991217756457085, "loss": 2.0926, "step": 121 }, { "epoch": 0.15, "learning_rate": 0.00019991058828739165, "loss": 2.0092, "step": 122 }, { "epoch": 0.15, "learning_rate": 0.00019990898476514193, "loss": 1.8076, "step": 123 }, { "epoch": 0.15, "learning_rate": 0.00019990736699805029, "loss": 2.0369, "step": 124 }, { "epoch": 0.15, "learning_rate": 0.00019990573498634742, "loss": 2.0488, "step": 125 }, { "epoch": 0.15, "learning_rate": 0.000199904088730266, "loss": 2.1534, "step": 126 }, { "epoch": 0.15, "learning_rate": 0.00019990242823004074, "loss": 2.1406, "step": 127 }, { "epoch": 0.15, "learning_rate": 0.00019990075348590839, "loss": 1.9379, "step": 128 }, { "epoch": 0.15, "learning_rate": 0.00019989906449810775, "loss": 1.9781, "step": 129 }, { "epoch": 0.16, "learning_rate": 0.00019989736126687963, "loss": 1.973, "step": 130 }, { "epoch": 0.16, "learning_rate": 0.00019989564379246683, "loss": 1.6825, "step": 131 }, { "epoch": 0.16, "learning_rate": 0.00019989391207511428, "loss": 2.0843, "step": 132 }, { "epoch": 0.16, "learning_rate": 0.00019989216611506887, "loss": 1.8547, "step": 133 }, { "epoch": 0.16, "learning_rate": 0.00019989040591257952, "loss": 1.7626, "step": 134 }, { "epoch": 0.16, "learning_rate": 0.0001998886314678972, "loss": 2.0531, "step": 135 }, { "epoch": 0.16, "learning_rate": 0.00019988684278127497, "loss": 2.0031, "step": 136 }, { "epoch": 0.16, "learning_rate": 0.00019988503985296773, "loss": 1.9342, "step": 137 }, { "epoch": 0.17, "learning_rate": 0.00019988322268323268, "loss": 2.3297, "step": 138 }, { "epoch": 0.17, "learning_rate": 0.00019988139127232878, "loss": 2.3401, "step": 139 }, { "epoch": 0.17, "learning_rate": 0.00019987954562051725, "loss": 1.8983, "step": 140 }, { "epoch": 0.17, "learning_rate": 0.0001998776857280612, "loss": 2.0621, "step": 141 }, { "epoch": 0.17, "learning_rate": 0.00019987581159522578, "loss": 2.0574, "step": 142 }, { "epoch": 0.17, "learning_rate": 0.00019987392322227824, "loss": 1.9516, "step": 143 }, { "epoch": 0.17, "learning_rate": 0.00019987202060948783, "loss": 2.1402, "step": 144 }, { "epoch": 0.17, "learning_rate": 0.00019987010375712577, "loss": 1.8903, "step": 145 }, { "epoch": 0.18, "learning_rate": 0.00019986817266546539, "loss": 1.8248, "step": 146 }, { "epoch": 0.18, "learning_rate": 0.00019986622733478204, "loss": 1.9877, "step": 147 }, { "epoch": 0.18, "learning_rate": 0.00019986426776535306, "loss": 1.6272, "step": 148 }, { "epoch": 0.18, "learning_rate": 0.00019986229395745785, "loss": 1.8605, "step": 149 }, { "epoch": 0.18, "learning_rate": 0.00019986030591137783, "loss": 1.6848, "step": 150 }, { "epoch": 0.18, "learning_rate": 0.00019985830362739647, "loss": 2.1922, "step": 151 }, { "epoch": 0.18, "learning_rate": 0.0001998562871057992, "loss": 2.0238, "step": 152 }, { "epoch": 0.18, "learning_rate": 0.0001998542563468736, "loss": 2.2246, "step": 153 }, { "epoch": 0.18, "learning_rate": 0.00019985221135090914, "loss": 1.9438, "step": 154 }, { "epoch": 0.19, "learning_rate": 0.00019985015211819744, "loss": 2.2136, "step": 155 }, { "epoch": 0.19, "learning_rate": 0.0001998480786490321, "loss": 2.4563, "step": 156 }, { "epoch": 0.19, "learning_rate": 0.00019984599094370874, "loss": 2.2138, "step": 157 }, { "epoch": 0.19, "learning_rate": 0.00019984388900252503, "loss": 2.2679, "step": 158 }, { "epoch": 0.19, "learning_rate": 0.00019984177282578064, "loss": 1.9537, "step": 159 }, { "epoch": 0.19, "learning_rate": 0.0001998396424137773, "loss": 2.0803, "step": 160 }, { "epoch": 0.19, "learning_rate": 0.0001998374977668188, "loss": 2.0282, "step": 161 }, { "epoch": 0.19, "learning_rate": 0.00019983533888521087, "loss": 2.0157, "step": 162 }, { "epoch": 0.2, "learning_rate": 0.0001998331657692613, "loss": 1.7837, "step": 163 }, { "epoch": 0.2, "learning_rate": 0.00019983097841928, "loss": 2.1556, "step": 164 }, { "epoch": 0.2, "learning_rate": 0.00019982877683557879, "loss": 2.1447, "step": 165 }, { "epoch": 0.2, "learning_rate": 0.00019982656101847162, "loss": 2.4139, "step": 166 }, { "epoch": 0.2, "learning_rate": 0.0001998243309682743, "loss": 1.6788, "step": 167 }, { "epoch": 0.2, "learning_rate": 0.00019982208668530493, "loss": 1.9008, "step": 168 }, { "epoch": 0.2, "learning_rate": 0.0001998198281698834, "loss": 2.173, "step": 169 }, { "epoch": 0.2, "learning_rate": 0.00019981755542233177, "loss": 2.1837, "step": 170 }, { "epoch": 0.21, "learning_rate": 0.00019981526844297404, "loss": 2.0639, "step": 171 }, { "epoch": 0.21, "learning_rate": 0.00019981296723213632, "loss": 2.3864, "step": 172 }, { "epoch": 0.21, "learning_rate": 0.00019981065179014673, "loss": 1.923, "step": 173 }, { "epoch": 0.21, "learning_rate": 0.00019980832211733535, "loss": 1.9192, "step": 174 }, { "epoch": 0.21, "learning_rate": 0.00019980597821403438, "loss": 2.0335, "step": 175 }, { "epoch": 0.21, "learning_rate": 0.000199803620080578, "loss": 1.8172, "step": 176 }, { "epoch": 0.21, "learning_rate": 0.0001998012477173024, "loss": 2.0294, "step": 177 }, { "epoch": 0.21, "learning_rate": 0.00019979886112454586, "loss": 2.2889, "step": 178 }, { "epoch": 0.21, "learning_rate": 0.00019979646030264867, "loss": 1.8498, "step": 179 }, { "epoch": 0.22, "learning_rate": 0.0001997940452519531, "loss": 2.0797, "step": 180 }, { "epoch": 0.22, "learning_rate": 0.0001997916159728035, "loss": 2.2356, "step": 181 }, { "epoch": 0.22, "learning_rate": 0.0001997891724655462, "loss": 2.1187, "step": 182 }, { "epoch": 0.22, "learning_rate": 0.00019978671473052964, "loss": 1.9301, "step": 183 }, { "epoch": 0.22, "learning_rate": 0.00019978424276810423, "loss": 1.8582, "step": 184 }, { "epoch": 0.22, "learning_rate": 0.0001997817565786224, "loss": 2.144, "step": 185 }, { "epoch": 0.22, "learning_rate": 0.00019977925616243862, "loss": 2.0595, "step": 186 }, { "epoch": 0.22, "learning_rate": 0.00019977674151990945, "loss": 1.9104, "step": 187 }, { "epoch": 0.23, "learning_rate": 0.00019977421265139332, "loss": 1.9727, "step": 188 }, { "epoch": 0.23, "learning_rate": 0.00019977166955725088, "loss": 1.8727, "step": 189 }, { "epoch": 0.23, "learning_rate": 0.0001997691122378447, "loss": 2.0611, "step": 190 }, { "epoch": 0.23, "learning_rate": 0.0001997665406935394, "loss": 2.0745, "step": 191 }, { "epoch": 0.23, "learning_rate": 0.0001997639549247016, "loss": 1.9974, "step": 192 }, { "epoch": 0.23, "learning_rate": 0.00019976135493169996, "loss": 1.9856, "step": 193 }, { "epoch": 0.23, "learning_rate": 0.00019975874071490526, "loss": 1.778, "step": 194 }, { "epoch": 0.23, "learning_rate": 0.00019975611227469016, "loss": 1.8347, "step": 195 }, { "epoch": 0.24, "learning_rate": 0.0001997534696114294, "loss": 1.5555, "step": 196 }, { "epoch": 0.24, "learning_rate": 0.00019975081272549989, "loss": 1.5625, "step": 197 }, { "epoch": 0.24, "learning_rate": 0.00019974814161728032, "loss": 1.9997, "step": 198 }, { "epoch": 0.24, "learning_rate": 0.00019974545628715157, "loss": 1.9523, "step": 199 }, { "epoch": 0.24, "learning_rate": 0.00019974275673549654, "loss": 2.1557, "step": 200 }, { "epoch": 0.24, "learning_rate": 0.00019974004296270006, "loss": 1.8306, "step": 201 }, { "epoch": 0.24, "learning_rate": 0.00019973731496914914, "loss": 2.0051, "step": 202 }, { "epoch": 0.24, "learning_rate": 0.00019973457275523264, "loss": 2.201, "step": 203 }, { "epoch": 0.24, "learning_rate": 0.0001997318163213416, "loss": 2.2446, "step": 204 }, { "epoch": 0.25, "learning_rate": 0.00019972904566786903, "loss": 2.1172, "step": 205 }, { "epoch": 0.25, "learning_rate": 0.00019972626079520995, "loss": 1.9849, "step": 206 }, { "epoch": 0.25, "learning_rate": 0.00019972346170376142, "loss": 1.9774, "step": 207 }, { "epoch": 0.25, "learning_rate": 0.0001997206483939225, "loss": 1.7625, "step": 208 }, { "epoch": 0.25, "learning_rate": 0.00019971782086609436, "loss": 2.2346, "step": 209 }, { "epoch": 0.25, "eval_loss": 2.00066876411438, "eval_runtime": 282.7648, "eval_samples_per_second": 0.729, "eval_steps_per_second": 0.729, "step": 209 }, { "epoch": 0.25, "learning_rate": 0.00019971497912068013, "loss": 2.4185, "step": 210 }, { "epoch": 0.25, "learning_rate": 0.00019971212315808497, "loss": 1.946, "step": 211 }, { "epoch": 0.25, "learning_rate": 0.00019970925297871605, "loss": 2.0049, "step": 212 }, { "epoch": 0.26, "learning_rate": 0.00019970636858298267, "loss": 1.9545, "step": 213 }, { "epoch": 0.26, "learning_rate": 0.00019970346997129598, "loss": 1.9636, "step": 214 }, { "epoch": 0.26, "learning_rate": 0.00019970055714406938, "loss": 1.9068, "step": 215 }, { "epoch": 0.26, "learning_rate": 0.00019969763010171807, "loss": 1.5749, "step": 216 }, { "epoch": 0.26, "learning_rate": 0.00019969468884465942, "loss": 1.7676, "step": 217 }, { "epoch": 0.26, "learning_rate": 0.0001996917333733128, "loss": 2.0329, "step": 218 }, { "epoch": 0.26, "learning_rate": 0.0001996887636880996, "loss": 1.9307, "step": 219 }, { "epoch": 0.26, "learning_rate": 0.00019968577978944323, "loss": 2.134, "step": 220 }, { "epoch": 0.27, "learning_rate": 0.00019968278167776908, "loss": 2.0911, "step": 221 }, { "epoch": 0.27, "learning_rate": 0.00019967976935350467, "loss": 2.5057, "step": 222 }, { "epoch": 0.27, "learning_rate": 0.0001996767428170795, "loss": 1.9267, "step": 223 }, { "epoch": 0.27, "learning_rate": 0.00019967370206892503, "loss": 2.3569, "step": 224 }, { "epoch": 0.27, "learning_rate": 0.00019967064710947488, "loss": 1.992, "step": 225 }, { "epoch": 0.27, "learning_rate": 0.00019966757793916454, "loss": 2.01, "step": 226 }, { "epoch": 0.27, "learning_rate": 0.00019966449455843165, "loss": 1.8037, "step": 227 }, { "epoch": 0.27, "learning_rate": 0.00019966139696771587, "loss": 2.2498, "step": 228 }, { "epoch": 0.27, "learning_rate": 0.00019965828516745876, "loss": 1.6563, "step": 229 }, { "epoch": 0.28, "learning_rate": 0.0001996551591581041, "loss": 1.979, "step": 230 }, { "epoch": 0.28, "learning_rate": 0.0001996520189400975, "loss": 2.1553, "step": 231 }, { "epoch": 0.28, "learning_rate": 0.0001996488645138867, "loss": 1.8743, "step": 232 }, { "epoch": 0.28, "learning_rate": 0.00019964569587992148, "loss": 2.1907, "step": 233 }, { "epoch": 0.28, "learning_rate": 0.00019964251303865362, "loss": 2.0644, "step": 234 }, { "epoch": 0.28, "learning_rate": 0.00019963931599053692, "loss": 2.1721, "step": 235 }, { "epoch": 0.28, "learning_rate": 0.0001996361047360272, "loss": 2.2267, "step": 236 }, { "epoch": 0.28, "learning_rate": 0.0001996328792755823, "loss": 1.9445, "step": 237 }, { "epoch": 0.29, "learning_rate": 0.00019962963960966213, "loss": 2.2003, "step": 238 }, { "epoch": 0.29, "learning_rate": 0.0001996263857387286, "loss": 2.3114, "step": 239 }, { "epoch": 0.29, "learning_rate": 0.0001996231176632456, "loss": 1.8553, "step": 240 }, { "epoch": 0.29, "learning_rate": 0.00019961983538367914, "loss": 2.1349, "step": 241 }, { "epoch": 0.29, "learning_rate": 0.00019961653890049715, "loss": 1.8784, "step": 242 }, { "epoch": 0.29, "learning_rate": 0.0001996132282141697, "loss": 2.0118, "step": 243 }, { "epoch": 0.29, "learning_rate": 0.00019960990332516874, "loss": 1.9938, "step": 244 }, { "epoch": 0.29, "learning_rate": 0.00019960656423396834, "loss": 2.2582, "step": 245 }, { "epoch": 0.3, "learning_rate": 0.00019960321094104465, "loss": 2.1807, "step": 246 }, { "epoch": 0.3, "learning_rate": 0.00019959984344687578, "loss": 1.9084, "step": 247 }, { "epoch": 0.3, "learning_rate": 0.00019959646175194174, "loss": 2.2879, "step": 248 }, { "epoch": 0.3, "learning_rate": 0.0001995930658567248, "loss": 1.942, "step": 249 }, { "epoch": 0.3, "learning_rate": 0.00019958965576170908, "loss": 2.1313, "step": 250 }, { "epoch": 0.3, "learning_rate": 0.00019958623146738088, "loss": 2.3202, "step": 251 }, { "epoch": 0.3, "learning_rate": 0.0001995827929742283, "loss": 1.7832, "step": 252 }, { "epoch": 0.3, "learning_rate": 0.00019957934028274162, "loss": 1.7103, "step": 253 }, { "epoch": 0.3, "learning_rate": 0.00019957587339341321, "loss": 1.9912, "step": 254 }, { "epoch": 0.31, "learning_rate": 0.0001995723923067373, "loss": 1.6686, "step": 255 }, { "epoch": 0.31, "learning_rate": 0.00019956889702321023, "loss": 1.966, "step": 256 }, { "epoch": 0.31, "learning_rate": 0.00019956538754333034, "loss": 2.2287, "step": 257 }, { "epoch": 0.31, "learning_rate": 0.00019956186386759804, "loss": 1.4866, "step": 258 }, { "epoch": 0.31, "learning_rate": 0.0001995583259965157, "loss": 1.9599, "step": 259 }, { "epoch": 0.31, "learning_rate": 0.00019955477393058773, "loss": 1.9273, "step": 260 }, { "epoch": 0.31, "learning_rate": 0.0001995512076703206, "loss": 1.847, "step": 261 }, { "epoch": 0.31, "learning_rate": 0.00019954762721622279, "loss": 2.0535, "step": 262 }, { "epoch": 0.32, "learning_rate": 0.0001995440325688048, "loss": 2.4403, "step": 263 }, { "epoch": 0.32, "learning_rate": 0.00019954042372857908, "loss": 1.8712, "step": 264 }, { "epoch": 0.32, "learning_rate": 0.00019953680069606026, "loss": 2.1837, "step": 265 }, { "epoch": 0.32, "learning_rate": 0.00019953316347176488, "loss": 2.0398, "step": 266 }, { "epoch": 0.32, "learning_rate": 0.0001995295120562115, "loss": 2.1135, "step": 267 }, { "epoch": 0.32, "learning_rate": 0.00019952584644992075, "loss": 2.0358, "step": 268 }, { "epoch": 0.32, "learning_rate": 0.00019952216665341526, "loss": 2.3282, "step": 269 }, { "epoch": 0.32, "learning_rate": 0.0001995184726672197, "loss": 1.9741, "step": 270 }, { "epoch": 0.33, "learning_rate": 0.00019951476449186074, "loss": 1.7523, "step": 271 }, { "epoch": 0.33, "learning_rate": 0.00019951104212786712, "loss": 2.1509, "step": 272 }, { "epoch": 0.33, "learning_rate": 0.0001995073055757695, "loss": 2.0865, "step": 273 }, { "epoch": 0.33, "learning_rate": 0.00019950355483610067, "loss": 1.8972, "step": 274 }, { "epoch": 0.33, "learning_rate": 0.00019949978990939542, "loss": 2.4693, "step": 275 }, { "epoch": 0.33, "learning_rate": 0.0001994960107961905, "loss": 1.9307, "step": 276 }, { "epoch": 0.33, "learning_rate": 0.0001994922174970248, "loss": 2.0097, "step": 277 }, { "epoch": 0.33, "learning_rate": 0.0001994884100124391, "loss": 1.6561, "step": 278 }, { "epoch": 0.33, "learning_rate": 0.0001994845883429763, "loss": 2.3069, "step": 279 }, { "epoch": 0.34, "learning_rate": 0.00019948075248918124, "loss": 2.0134, "step": 280 }, { "epoch": 0.34, "learning_rate": 0.00019947690245160091, "loss": 2.1061, "step": 281 }, { "epoch": 0.34, "learning_rate": 0.00019947303823078416, "loss": 2.0855, "step": 282 }, { "epoch": 0.34, "learning_rate": 0.00019946915982728197, "loss": 1.5672, "step": 283 }, { "epoch": 0.34, "learning_rate": 0.0001994652672416473, "loss": 1.7289, "step": 284 }, { "epoch": 0.34, "learning_rate": 0.00019946136047443522, "loss": 1.9013, "step": 285 }, { "epoch": 0.34, "learning_rate": 0.00019945743952620268, "loss": 2.3105, "step": 286 }, { "epoch": 0.34, "learning_rate": 0.00019945350439750872, "loss": 2.341, "step": 287 }, { "epoch": 0.35, "learning_rate": 0.00019944955508891443, "loss": 1.88, "step": 288 }, { "epoch": 0.35, "learning_rate": 0.0001994455916009829, "loss": 1.913, "step": 289 }, { "epoch": 0.35, "learning_rate": 0.00019944161393427922, "loss": 1.9513, "step": 290 }, { "epoch": 0.35, "learning_rate": 0.00019943762208937053, "loss": 2.3331, "step": 291 }, { "epoch": 0.35, "learning_rate": 0.00019943361606682597, "loss": 2.3024, "step": 292 }, { "epoch": 0.35, "learning_rate": 0.00019942959586721672, "loss": 2.2222, "step": 293 }, { "epoch": 0.35, "learning_rate": 0.00019942556149111598, "loss": 2.1003, "step": 294 }, { "epoch": 0.35, "learning_rate": 0.0001994215129390989, "loss": 1.9038, "step": 295 }, { "epoch": 0.36, "learning_rate": 0.00019941745021174282, "loss": 1.6068, "step": 296 }, { "epoch": 0.36, "learning_rate": 0.00019941337330962693, "loss": 1.8894, "step": 297 }, { "epoch": 0.36, "learning_rate": 0.00019940928223333252, "loss": 2.3158, "step": 298 }, { "epoch": 0.36, "learning_rate": 0.0001994051769834429, "loss": 2.1015, "step": 299 }, { "epoch": 0.36, "learning_rate": 0.00019940105756054337, "loss": 2.1519, "step": 300 }, { "epoch": 0.36, "learning_rate": 0.00019939692396522127, "loss": 1.7233, "step": 301 }, { "epoch": 0.36, "learning_rate": 0.00019939277619806598, "loss": 1.85, "step": 302 }, { "epoch": 0.36, "learning_rate": 0.00019938861425966887, "loss": 2.2368, "step": 303 }, { "epoch": 0.36, "learning_rate": 0.00019938443815062335, "loss": 1.765, "step": 304 }, { "epoch": 0.37, "learning_rate": 0.0001993802478715248, "loss": 1.6333, "step": 305 }, { "epoch": 0.37, "learning_rate": 0.00019937604342297073, "loss": 2.191, "step": 306 }, { "epoch": 0.37, "learning_rate": 0.00019937182480556055, "loss": 2.2402, "step": 307 }, { "epoch": 0.37, "learning_rate": 0.00019936759201989577, "loss": 2.0568, "step": 308 }, { "epoch": 0.37, "learning_rate": 0.0001993633450665799, "loss": 2.4314, "step": 309 }, { "epoch": 0.37, "learning_rate": 0.00019935908394621844, "loss": 2.0556, "step": 310 }, { "epoch": 0.37, "learning_rate": 0.00019935480865941894, "loss": 2.0988, "step": 311 }, { "epoch": 0.37, "learning_rate": 0.00019935051920679094, "loss": 2.0964, "step": 312 }, { "epoch": 0.38, "learning_rate": 0.00019934621558894607, "loss": 1.9365, "step": 313 }, { "epoch": 0.38, "learning_rate": 0.0001993418978064979, "loss": 1.6224, "step": 314 }, { "epoch": 0.38, "learning_rate": 0.00019933756586006202, "loss": 2.144, "step": 315 }, { "epoch": 0.38, "learning_rate": 0.00019933321975025616, "loss": 2.2899, "step": 316 }, { "epoch": 0.38, "learning_rate": 0.00019932885947769992, "loss": 1.8865, "step": 317 }, { "epoch": 0.38, "learning_rate": 0.000199324485043015, "loss": 2.3996, "step": 318 }, { "epoch": 0.38, "learning_rate": 0.0001993200964468251, "loss": 1.3858, "step": 319 }, { "epoch": 0.38, "learning_rate": 0.00019931569368975588, "loss": 2.2231, "step": 320 }, { "epoch": 0.39, "learning_rate": 0.00019931127677243516, "loss": 2.0537, "step": 321 }, { "epoch": 0.39, "learning_rate": 0.00019930684569549264, "loss": 2.1381, "step": 322 }, { "epoch": 0.39, "learning_rate": 0.00019930240045956012, "loss": 2.0152, "step": 323 }, { "epoch": 0.39, "learning_rate": 0.0001992979410652714, "loss": 2.0293, "step": 324 }, { "epoch": 0.39, "learning_rate": 0.00019929346751326228, "loss": 1.7457, "step": 325 }, { "epoch": 0.39, "learning_rate": 0.00019928897980417057, "loss": 1.987, "step": 326 }, { "epoch": 0.39, "learning_rate": 0.00019928447793863616, "loss": 2.2451, "step": 327 }, { "epoch": 0.39, "learning_rate": 0.00019927996191730093, "loss": 2.3312, "step": 328 }, { "epoch": 0.39, "learning_rate": 0.0001992754317408087, "loss": 1.8771, "step": 329 }, { "epoch": 0.4, "learning_rate": 0.0001992708874098054, "loss": 1.833, "step": 330 }, { "epoch": 0.4, "learning_rate": 0.00019926632892493896, "loss": 1.9343, "step": 331 }, { "epoch": 0.4, "learning_rate": 0.00019926175628685937, "loss": 2.2328, "step": 332 }, { "epoch": 0.4, "learning_rate": 0.0001992571694962185, "loss": 1.9916, "step": 333 }, { "epoch": 0.4, "learning_rate": 0.0001992525685536704, "loss": 1.9497, "step": 334 }, { "epoch": 0.4, "learning_rate": 0.000199247953459871, "loss": 2.029, "step": 335 }, { "epoch": 0.4, "learning_rate": 0.00019924332421547835, "loss": 2.0326, "step": 336 }, { "epoch": 0.4, "learning_rate": 0.0001992386808211525, "loss": 2.6406, "step": 337 }, { "epoch": 0.41, "learning_rate": 0.00019923402327755546, "loss": 2.3811, "step": 338 }, { "epoch": 0.41, "learning_rate": 0.00019922935158535129, "loss": 1.6143, "step": 339 }, { "epoch": 0.41, "learning_rate": 0.00019922466574520608, "loss": 2.2182, "step": 340 }, { "epoch": 0.41, "learning_rate": 0.00019921996575778794, "loss": 2.218, "step": 341 }, { "epoch": 0.41, "learning_rate": 0.000199215251623767, "loss": 1.8615, "step": 342 }, { "epoch": 0.41, "learning_rate": 0.00019921052334381534, "loss": 2.165, "step": 343 }, { "epoch": 0.41, "learning_rate": 0.00019920578091860716, "loss": 2.1627, "step": 344 }, { "epoch": 0.41, "learning_rate": 0.0001992010243488186, "loss": 2.154, "step": 345 }, { "epoch": 0.42, "learning_rate": 0.00019919625363512786, "loss": 1.5966, "step": 346 }, { "epoch": 0.42, "learning_rate": 0.00019919146877821512, "loss": 2.0903, "step": 347 }, { "epoch": 0.42, "learning_rate": 0.0001991866697787626, "loss": 2.2322, "step": 348 }, { "epoch": 0.42, "learning_rate": 0.00019918185663745456, "loss": 1.9319, "step": 349 }, { "epoch": 0.42, "learning_rate": 0.00019917702935497725, "loss": 2.1367, "step": 350 }, { "epoch": 0.42, "learning_rate": 0.00019917218793201886, "loss": 2.1767, "step": 351 }, { "epoch": 0.42, "learning_rate": 0.00019916733236926976, "loss": 2.1009, "step": 352 }, { "epoch": 0.42, "learning_rate": 0.0001991624626674222, "loss": 2.1286, "step": 353 }, { "epoch": 0.42, "learning_rate": 0.0001991575788271705, "loss": 2.181, "step": 354 }, { "epoch": 0.43, "learning_rate": 0.00019915268084921101, "loss": 2.12, "step": 355 }, { "epoch": 0.43, "learning_rate": 0.00019914776873424206, "loss": 1.9895, "step": 356 }, { "epoch": 0.43, "learning_rate": 0.000199142842482964, "loss": 1.9285, "step": 357 }, { "epoch": 0.43, "learning_rate": 0.0001991379020960792, "loss": 2.2376, "step": 358 }, { "epoch": 0.43, "learning_rate": 0.0001991329475742921, "loss": 2.1274, "step": 359 }, { "epoch": 0.43, "learning_rate": 0.00019912797891830908, "loss": 2.0043, "step": 360 }, { "epoch": 0.43, "learning_rate": 0.00019912299612883852, "loss": 2.022, "step": 361 }, { "epoch": 0.43, "learning_rate": 0.00019911799920659093, "loss": 1.7343, "step": 362 }, { "epoch": 0.44, "learning_rate": 0.0001991129881522787, "loss": 2.0621, "step": 363 }, { "epoch": 0.44, "learning_rate": 0.00019910796296661632, "loss": 1.5116, "step": 364 }, { "epoch": 0.44, "learning_rate": 0.0001991029236503203, "loss": 2.0485, "step": 365 }, { "epoch": 0.44, "learning_rate": 0.00019909787020410907, "loss": 1.971, "step": 366 }, { "epoch": 0.44, "learning_rate": 0.00019909280262870324, "loss": 1.9724, "step": 367 }, { "epoch": 0.44, "learning_rate": 0.00019908772092482524, "loss": 1.318, "step": 368 }, { "epoch": 0.44, "learning_rate": 0.00019908262509319964, "loss": 2.0539, "step": 369 }, { "epoch": 0.44, "learning_rate": 0.00019907751513455302, "loss": 2.1097, "step": 370 }, { "epoch": 0.45, "learning_rate": 0.00019907239104961392, "loss": 2.0632, "step": 371 }, { "epoch": 0.45, "learning_rate": 0.00019906725283911296, "loss": 2.1897, "step": 372 }, { "epoch": 0.45, "learning_rate": 0.00019906210050378266, "loss": 2.2002, "step": 373 }, { "epoch": 0.45, "learning_rate": 0.00019905693404435773, "loss": 1.9005, "step": 374 }, { "epoch": 0.45, "learning_rate": 0.00019905175346157474, "loss": 1.9873, "step": 375 }, { "epoch": 0.45, "learning_rate": 0.00019904655875617233, "loss": 1.7215, "step": 376 }, { "epoch": 0.45, "learning_rate": 0.00019904134992889113, "loss": 2.0434, "step": 377 }, { "epoch": 0.45, "learning_rate": 0.00019903612698047383, "loss": 2.4223, "step": 378 }, { "epoch": 0.45, "learning_rate": 0.00019903088991166513, "loss": 2.0837, "step": 379 }, { "epoch": 0.46, "learning_rate": 0.00019902563872321172, "loss": 2.2389, "step": 380 }, { "epoch": 0.46, "learning_rate": 0.00019902037341586225, "loss": 1.7205, "step": 381 }, { "epoch": 0.46, "learning_rate": 0.0001990150939903675, "loss": 1.9577, "step": 382 }, { "epoch": 0.46, "learning_rate": 0.00019900980044748015, "loss": 1.8778, "step": 383 }, { "epoch": 0.46, "learning_rate": 0.000199004492787955, "loss": 2.2213, "step": 384 }, { "epoch": 0.46, "learning_rate": 0.00019899917101254874, "loss": 2.0927, "step": 385 }, { "epoch": 0.46, "learning_rate": 0.00019899383512202019, "loss": 2.2921, "step": 386 }, { "epoch": 0.46, "learning_rate": 0.0001989884851171301, "loss": 2.2983, "step": 387 }, { "epoch": 0.47, "learning_rate": 0.0001989831209986413, "loss": 1.8052, "step": 388 }, { "epoch": 0.47, "learning_rate": 0.00019897774276731857, "loss": 1.7741, "step": 389 }, { "epoch": 0.47, "learning_rate": 0.00019897235042392873, "loss": 1.779, "step": 390 }, { "epoch": 0.47, "learning_rate": 0.00019896694396924063, "loss": 1.6924, "step": 391 }, { "epoch": 0.47, "learning_rate": 0.00019896152340402509, "loss": 2.036, "step": 392 }, { "epoch": 0.47, "learning_rate": 0.00019895608872905494, "loss": 2.04, "step": 393 }, { "epoch": 0.47, "learning_rate": 0.0001989506399451051, "loss": 2.1702, "step": 394 }, { "epoch": 0.47, "learning_rate": 0.00019894517705295245, "loss": 1.9429, "step": 395 }, { "epoch": 0.48, "learning_rate": 0.00019893970005337584, "loss": 2.0528, "step": 396 }, { "epoch": 0.48, "learning_rate": 0.00019893420894715618, "loss": 1.7906, "step": 397 }, { "epoch": 0.48, "learning_rate": 0.0001989287037350764, "loss": 2.3494, "step": 398 }, { "epoch": 0.48, "learning_rate": 0.00019892318441792138, "loss": 1.7415, "step": 399 }, { "epoch": 0.48, "learning_rate": 0.0001989176509964781, "loss": 2.0184, "step": 400 }, { "epoch": 0.48, "learning_rate": 0.0001989121034715355, "loss": 1.9277, "step": 401 }, { "epoch": 0.48, "learning_rate": 0.0001989065418438845, "loss": 2.2168, "step": 402 }, { "epoch": 0.48, "learning_rate": 0.00019890096611431814, "loss": 2.6114, "step": 403 }, { "epoch": 0.48, "learning_rate": 0.00019889537628363133, "loss": 2.0713, "step": 404 }, { "epoch": 0.49, "learning_rate": 0.00019888977235262104, "loss": 2.2966, "step": 405 }, { "epoch": 0.49, "learning_rate": 0.00019888415432208636, "loss": 2.5206, "step": 406 }, { "epoch": 0.49, "learning_rate": 0.00019887852219282822, "loss": 2.4503, "step": 407 }, { "epoch": 0.49, "learning_rate": 0.00019887287596564966, "loss": 2.102, "step": 408 }, { "epoch": 0.49, "learning_rate": 0.00019886721564135572, "loss": 2.3275, "step": 409 }, { "epoch": 0.49, "learning_rate": 0.00019886154122075343, "loss": 2.0481, "step": 410 }, { "epoch": 0.49, "learning_rate": 0.00019885585270465182, "loss": 1.8395, "step": 411 }, { "epoch": 0.49, "learning_rate": 0.00019885015009386202, "loss": 2.3535, "step": 412 }, { "epoch": 0.5, "learning_rate": 0.000198844433389197, "loss": 2.0147, "step": 413 }, { "epoch": 0.5, "learning_rate": 0.0001988387025914719, "loss": 2.1919, "step": 414 }, { "epoch": 0.5, "learning_rate": 0.0001988329577015038, "loss": 2.156, "step": 415 }, { "epoch": 0.5, "learning_rate": 0.00019882719872011176, "loss": 2.2672, "step": 416 }, { "epoch": 0.5, "learning_rate": 0.00019882142564811694, "loss": 2.3242, "step": 417 }, { "epoch": 0.5, "learning_rate": 0.0001988156384863424, "loss": 2.0259, "step": 418 }, { "epoch": 0.5, "eval_loss": 1.9941134452819824, "eval_runtime": 282.533, "eval_samples_per_second": 0.729, "eval_steps_per_second": 0.729, "step": 418 }, { "epoch": 0.5, "learning_rate": 0.00019880983723561332, "loss": 1.7039, "step": 419 }, { "epoch": 0.5, "learning_rate": 0.00019880402189675678, "loss": 2.1007, "step": 420 }, { "epoch": 0.51, "learning_rate": 0.00019879819247060193, "loss": 2.2297, "step": 421 }, { "epoch": 0.51, "learning_rate": 0.00019879234895797996, "loss": 1.6166, "step": 422 }, { "epoch": 0.51, "learning_rate": 0.000198786491359724, "loss": 2.408, "step": 423 }, { "epoch": 0.51, "learning_rate": 0.00019878061967666915, "loss": 1.686, "step": 424 }, { "epoch": 0.51, "learning_rate": 0.0001987747339096527, "loss": 2.0492, "step": 425 }, { "epoch": 0.51, "learning_rate": 0.00019876883405951377, "loss": 2.2179, "step": 426 }, { "epoch": 0.51, "learning_rate": 0.00019876292012709356, "loss": 1.8812, "step": 427 }, { "epoch": 0.51, "learning_rate": 0.00019875699211323528, "loss": 2.2888, "step": 428 }, { "epoch": 0.52, "learning_rate": 0.00019875105001878409, "loss": 2.0561, "step": 429 }, { "epoch": 0.52, "learning_rate": 0.00019874509384458725, "loss": 1.9299, "step": 430 }, { "epoch": 0.52, "learning_rate": 0.00019873912359149397, "loss": 2.1999, "step": 431 }, { "epoch": 0.52, "learning_rate": 0.00019873313926035548, "loss": 1.8509, "step": 432 }, { "epoch": 0.52, "learning_rate": 0.00019872714085202503, "loss": 1.8281, "step": 433 }, { "epoch": 0.52, "learning_rate": 0.0001987211283673578, "loss": 1.8359, "step": 434 }, { "epoch": 0.52, "learning_rate": 0.0001987151018072111, "loss": 2.2844, "step": 435 }, { "epoch": 0.52, "learning_rate": 0.00019870906117244416, "loss": 1.9397, "step": 436 }, { "epoch": 0.52, "learning_rate": 0.00019870300646391824, "loss": 2.302, "step": 437 }, { "epoch": 0.53, "learning_rate": 0.00019869693768249661, "loss": 2.1176, "step": 438 }, { "epoch": 0.53, "learning_rate": 0.00019869085482904458, "loss": 2.1909, "step": 439 }, { "epoch": 0.53, "learning_rate": 0.0001986847579044294, "loss": 2.2382, "step": 440 }, { "epoch": 0.53, "learning_rate": 0.00019867864690952035, "loss": 2.0988, "step": 441 }, { "epoch": 0.53, "learning_rate": 0.00019867252184518878, "loss": 2.2136, "step": 442 }, { "epoch": 0.53, "learning_rate": 0.0001986663827123079, "loss": 1.9324, "step": 443 }, { "epoch": 0.53, "learning_rate": 0.00019866022951175308, "loss": 2.1274, "step": 444 }, { "epoch": 0.53, "learning_rate": 0.00019865406224440165, "loss": 1.8625, "step": 445 }, { "epoch": 0.54, "learning_rate": 0.00019864788091113287, "loss": 2.0009, "step": 446 }, { "epoch": 0.54, "learning_rate": 0.0001986416855128281, "loss": 2.2245, "step": 447 }, { "epoch": 0.54, "learning_rate": 0.00019863547605037063, "loss": 2.0654, "step": 448 }, { "epoch": 0.54, "learning_rate": 0.00019862925252464586, "loss": 1.4339, "step": 449 }, { "epoch": 0.54, "learning_rate": 0.00019862301493654108, "loss": 2.1347, "step": 450 }, { "epoch": 0.54, "learning_rate": 0.00019861676328694562, "loss": 1.7029, "step": 451 }, { "epoch": 0.54, "learning_rate": 0.00019861049757675088, "loss": 2.0081, "step": 452 }, { "epoch": 0.54, "learning_rate": 0.00019860421780685018, "loss": 1.9994, "step": 453 }, { "epoch": 0.55, "learning_rate": 0.0001985979239781389, "loss": 1.9325, "step": 454 }, { "epoch": 0.55, "learning_rate": 0.00019859161609151436, "loss": 1.8502, "step": 455 }, { "epoch": 0.55, "learning_rate": 0.000198585294147876, "loss": 2.3779, "step": 456 }, { "epoch": 0.55, "learning_rate": 0.00019857895814812509, "loss": 2.0303, "step": 457 }, { "epoch": 0.55, "learning_rate": 0.0001985726080931651, "loss": 1.9898, "step": 458 }, { "epoch": 0.55, "learning_rate": 0.00019856624398390137, "loss": 1.7648, "step": 459 }, { "epoch": 0.55, "learning_rate": 0.00019855986582124126, "loss": 1.7822, "step": 460 }, { "epoch": 0.55, "learning_rate": 0.0001985534736060942, "loss": 1.9219, "step": 461 }, { "epoch": 0.55, "learning_rate": 0.00019854706733937155, "loss": 2.1789, "step": 462 }, { "epoch": 0.56, "learning_rate": 0.00019854064702198675, "loss": 1.9091, "step": 463 }, { "epoch": 0.56, "learning_rate": 0.00019853421265485514, "loss": 1.9941, "step": 464 }, { "epoch": 0.56, "learning_rate": 0.0001985277642388941, "loss": 1.904, "step": 465 }, { "epoch": 0.56, "learning_rate": 0.00019852130177502316, "loss": 1.6299, "step": 466 }, { "epoch": 0.56, "learning_rate": 0.0001985148252641636, "loss": 1.7712, "step": 467 }, { "epoch": 0.56, "learning_rate": 0.00019850833470723886, "loss": 1.6825, "step": 468 }, { "epoch": 0.56, "learning_rate": 0.0001985018301051744, "loss": 1.7408, "step": 469 }, { "epoch": 0.56, "learning_rate": 0.00019849531145889758, "loss": 2.0622, "step": 470 }, { "epoch": 0.57, "learning_rate": 0.00019848877876933784, "loss": 1.5699, "step": 471 }, { "epoch": 0.57, "learning_rate": 0.0001984822320374266, "loss": 2.0253, "step": 472 }, { "epoch": 0.57, "learning_rate": 0.00019847567126409724, "loss": 2.2186, "step": 473 }, { "epoch": 0.57, "learning_rate": 0.00019846909645028523, "loss": 2.0872, "step": 474 }, { "epoch": 0.57, "learning_rate": 0.000198462507596928, "loss": 1.9362, "step": 475 }, { "epoch": 0.57, "learning_rate": 0.00019845590470496497, "loss": 2.4109, "step": 476 }, { "epoch": 0.57, "learning_rate": 0.00019844928777533753, "loss": 2.2626, "step": 477 }, { "epoch": 0.57, "learning_rate": 0.00019844265680898918, "loss": 2.0874, "step": 478 }, { "epoch": 0.58, "learning_rate": 0.0001984360118068653, "loss": 2.1606, "step": 479 }, { "epoch": 0.58, "learning_rate": 0.0001984293527699133, "loss": 2.063, "step": 480 }, { "epoch": 0.58, "learning_rate": 0.00019842267969908265, "loss": 1.9065, "step": 481 }, { "epoch": 0.58, "learning_rate": 0.0001984159925953248, "loss": 1.9511, "step": 482 }, { "epoch": 0.58, "learning_rate": 0.00019840929145959317, "loss": 2.056, "step": 483 }, { "epoch": 0.58, "learning_rate": 0.00019840257629284317, "loss": 2.2353, "step": 484 }, { "epoch": 0.58, "learning_rate": 0.00019839584709603226, "loss": 1.9401, "step": 485 }, { "epoch": 0.58, "learning_rate": 0.0001983891038701199, "loss": 1.9648, "step": 486 }, { "epoch": 0.58, "learning_rate": 0.00019838234661606748, "loss": 1.753, "step": 487 }, { "epoch": 0.59, "learning_rate": 0.00019837557533483846, "loss": 1.7805, "step": 488 }, { "epoch": 0.59, "learning_rate": 0.00019836879002739827, "loss": 2.192, "step": 489 }, { "epoch": 0.59, "learning_rate": 0.00019836199069471437, "loss": 1.9112, "step": 490 }, { "epoch": 0.59, "learning_rate": 0.00019835517733775615, "loss": 2.0119, "step": 491 }, { "epoch": 0.59, "learning_rate": 0.0001983483499574951, "loss": 1.8932, "step": 492 }, { "epoch": 0.59, "learning_rate": 0.00019834150855490464, "loss": 1.5968, "step": 493 }, { "epoch": 0.59, "learning_rate": 0.00019833465313096017, "loss": 2.1493, "step": 494 }, { "epoch": 0.59, "learning_rate": 0.00019832778368663917, "loss": 1.8863, "step": 495 }, { "epoch": 0.6, "learning_rate": 0.000198320900222921, "loss": 2.2134, "step": 496 }, { "epoch": 0.6, "learning_rate": 0.00019831400274078717, "loss": 2.2831, "step": 497 }, { "epoch": 0.6, "learning_rate": 0.00019830709124122112, "loss": 2.0266, "step": 498 }, { "epoch": 0.6, "learning_rate": 0.0001983001657252082, "loss": 2.3392, "step": 499 }, { "epoch": 0.6, "learning_rate": 0.00019829322619373588, "loss": 1.8426, "step": 500 }, { "epoch": 0.6, "learning_rate": 0.00019828627264779363, "loss": 2.0742, "step": 501 }, { "epoch": 0.6, "learning_rate": 0.0001982793050883728, "loss": 1.9578, "step": 502 }, { "epoch": 0.6, "learning_rate": 0.00019827232351646686, "loss": 2.0863, "step": 503 }, { "epoch": 0.61, "learning_rate": 0.0001982653279330712, "loss": 2.2881, "step": 504 }, { "epoch": 0.61, "learning_rate": 0.00019825831833918323, "loss": 1.8869, "step": 505 }, { "epoch": 0.61, "learning_rate": 0.0001982512947358024, "loss": 1.8997, "step": 506 }, { "epoch": 0.61, "learning_rate": 0.00019824425712393012, "loss": 1.8945, "step": 507 }, { "epoch": 0.61, "learning_rate": 0.00019823720550456977, "loss": 1.9496, "step": 508 }, { "epoch": 0.61, "learning_rate": 0.0001982301398787268, "loss": 2.1066, "step": 509 }, { "epoch": 0.61, "learning_rate": 0.00019822306024740852, "loss": 1.958, "step": 510 }, { "epoch": 0.61, "learning_rate": 0.00019821596661162447, "loss": 2.1112, "step": 511 }, { "epoch": 0.61, "learning_rate": 0.00019820885897238596, "loss": 2.1012, "step": 512 }, { "epoch": 0.62, "learning_rate": 0.0001982017373307064, "loss": 2.2623, "step": 513 }, { "epoch": 0.62, "learning_rate": 0.00019819460168760117, "loss": 2.5058, "step": 514 }, { "epoch": 0.62, "learning_rate": 0.0001981874520440877, "loss": 2.1367, "step": 515 }, { "epoch": 0.62, "learning_rate": 0.00019818028840118532, "loss": 2.2743, "step": 516 }, { "epoch": 0.62, "learning_rate": 0.00019817311075991543, "loss": 1.5517, "step": 517 }, { "epoch": 0.62, "learning_rate": 0.0001981659191213014, "loss": 1.9569, "step": 518 }, { "epoch": 0.62, "learning_rate": 0.00019815871348636863, "loss": 2.0566, "step": 519 }, { "epoch": 0.62, "learning_rate": 0.00019815149385614444, "loss": 1.8859, "step": 520 }, { "epoch": 0.63, "learning_rate": 0.00019814426023165825, "loss": 2.0298, "step": 521 }, { "epoch": 0.63, "learning_rate": 0.00019813701261394136, "loss": 2.0614, "step": 522 }, { "epoch": 0.63, "learning_rate": 0.00019812975100402715, "loss": 2.221, "step": 523 }, { "epoch": 0.63, "learning_rate": 0.00019812247540295096, "loss": 2.1255, "step": 524 }, { "epoch": 0.63, "learning_rate": 0.00019811518581175014, "loss": 2.1885, "step": 525 }, { "epoch": 0.63, "learning_rate": 0.000198107882231464, "loss": 2.3918, "step": 526 }, { "epoch": 0.63, "learning_rate": 0.00019810056466313392, "loss": 2.2759, "step": 527 }, { "epoch": 0.63, "learning_rate": 0.00019809323310780318, "loss": 1.9727, "step": 528 }, { "epoch": 0.64, "learning_rate": 0.0001980858875665171, "loss": 2.0417, "step": 529 }, { "epoch": 0.64, "learning_rate": 0.00019807852804032305, "loss": 1.645, "step": 530 }, { "epoch": 0.64, "learning_rate": 0.0001980711545302703, "loss": 1.7943, "step": 531 }, { "epoch": 0.64, "learning_rate": 0.00019806376703741015, "loss": 1.8844, "step": 532 }, { "epoch": 0.64, "learning_rate": 0.00019805636556279588, "loss": 2.1128, "step": 533 }, { "epoch": 0.64, "learning_rate": 0.0001980489501074828, "loss": 2.0272, "step": 534 }, { "epoch": 0.64, "learning_rate": 0.00019804152067252816, "loss": 2.0916, "step": 535 }, { "epoch": 0.64, "learning_rate": 0.00019803407725899131, "loss": 1.7287, "step": 536 }, { "epoch": 0.64, "learning_rate": 0.00019802661986793342, "loss": 2.0667, "step": 537 }, { "epoch": 0.65, "learning_rate": 0.00019801914850041784, "loss": 2.4016, "step": 538 }, { "epoch": 0.65, "learning_rate": 0.00019801166315750978, "loss": 1.8557, "step": 539 }, { "epoch": 0.65, "learning_rate": 0.0001980041638402765, "loss": 1.8072, "step": 540 }, { "epoch": 0.65, "learning_rate": 0.00019799665054978722, "loss": 2.2252, "step": 541 }, { "epoch": 0.65, "learning_rate": 0.00019798912328711322, "loss": 2.1377, "step": 542 }, { "epoch": 0.65, "learning_rate": 0.00019798158205332764, "loss": 2.0306, "step": 543 }, { "epoch": 0.65, "learning_rate": 0.00019797402684950576, "loss": 1.7428, "step": 544 }, { "epoch": 0.65, "learning_rate": 0.00019796645767672477, "loss": 2.0843, "step": 545 }, { "epoch": 0.66, "learning_rate": 0.00019795887453606388, "loss": 1.9175, "step": 546 }, { "epoch": 0.66, "learning_rate": 0.00019795127742860423, "loss": 1.6673, "step": 547 }, { "epoch": 0.66, "learning_rate": 0.0001979436663554291, "loss": 1.5553, "step": 548 }, { "epoch": 0.66, "learning_rate": 0.00019793604131762357, "loss": 1.604, "step": 549 }, { "epoch": 0.66, "learning_rate": 0.00019792840231627482, "loss": 2.023, "step": 550 }, { "epoch": 0.66, "learning_rate": 0.00019792074935247206, "loss": 1.8399, "step": 551 }, { "epoch": 0.66, "learning_rate": 0.00019791308242730638, "loss": 1.8579, "step": 552 }, { "epoch": 0.66, "learning_rate": 0.00019790540154187094, "loss": 2.2135, "step": 553 }, { "epoch": 0.67, "learning_rate": 0.00019789770669726087, "loss": 1.7894, "step": 554 }, { "epoch": 0.67, "learning_rate": 0.00019788999789457326, "loss": 2.1723, "step": 555 }, { "epoch": 0.67, "learning_rate": 0.00019788227513490723, "loss": 2.0881, "step": 556 }, { "epoch": 0.67, "learning_rate": 0.00019787453841936393, "loss": 1.7181, "step": 557 }, { "epoch": 0.67, "learning_rate": 0.00019786678774904638, "loss": 1.8725, "step": 558 }, { "epoch": 0.67, "learning_rate": 0.00019785902312505964, "loss": 2.0544, "step": 559 }, { "epoch": 0.67, "learning_rate": 0.00019785124454851084, "loss": 1.7503, "step": 560 }, { "epoch": 0.67, "learning_rate": 0.000197843452020509, "loss": 2.01, "step": 561 }, { "epoch": 0.67, "learning_rate": 0.00019783564554216518, "loss": 1.748, "step": 562 }, { "epoch": 0.68, "learning_rate": 0.0001978278251145924, "loss": 2.0866, "step": 563 }, { "epoch": 0.68, "learning_rate": 0.0001978199907389057, "loss": 1.6046, "step": 564 }, { "epoch": 0.68, "learning_rate": 0.00019781214241622208, "loss": 1.9222, "step": 565 }, { "epoch": 0.68, "learning_rate": 0.00019780428014766051, "loss": 2.2003, "step": 566 }, { "epoch": 0.68, "learning_rate": 0.00019779640393434206, "loss": 2.0534, "step": 567 }, { "epoch": 0.68, "learning_rate": 0.0001977885137773896, "loss": 1.8609, "step": 568 }, { "epoch": 0.68, "learning_rate": 0.00019778060967792817, "loss": 2.0666, "step": 569 }, { "epoch": 0.68, "learning_rate": 0.00019777269163708468, "loss": 1.9512, "step": 570 }, { "epoch": 0.69, "learning_rate": 0.00019776475965598814, "loss": 1.8349, "step": 571 }, { "epoch": 0.69, "learning_rate": 0.0001977568137357694, "loss": 2.0507, "step": 572 }, { "epoch": 0.69, "learning_rate": 0.00019774885387756138, "loss": 1.7588, "step": 573 }, { "epoch": 0.69, "learning_rate": 0.000197740880082499, "loss": 2.0981, "step": 574 }, { "epoch": 0.69, "learning_rate": 0.00019773289235171918, "loss": 2.0953, "step": 575 }, { "epoch": 0.69, "learning_rate": 0.00019772489068636077, "loss": 2.0678, "step": 576 }, { "epoch": 0.69, "learning_rate": 0.00019771687508756466, "loss": 2.0136, "step": 577 }, { "epoch": 0.69, "learning_rate": 0.0001977088455564736, "loss": 1.9781, "step": 578 }, { "epoch": 0.7, "learning_rate": 0.00019770080209423254, "loss": 2.2185, "step": 579 }, { "epoch": 0.7, "learning_rate": 0.00019769274470198827, "loss": 1.8076, "step": 580 }, { "epoch": 0.7, "learning_rate": 0.00019768467338088957, "loss": 1.6888, "step": 581 }, { "epoch": 0.7, "learning_rate": 0.00019767658813208726, "loss": 2.1273, "step": 582 }, { "epoch": 0.7, "learning_rate": 0.0001976684889567341, "loss": 2.3232, "step": 583 }, { "epoch": 0.7, "learning_rate": 0.00019766037585598487, "loss": 2.366, "step": 584 }, { "epoch": 0.7, "learning_rate": 0.00019765224883099635, "loss": 1.8939, "step": 585 }, { "epoch": 0.7, "learning_rate": 0.00019764410788292722, "loss": 2.0162, "step": 586 }, { "epoch": 0.7, "learning_rate": 0.00019763595301293822, "loss": 2.2752, "step": 587 }, { "epoch": 0.71, "learning_rate": 0.0001976277842221921, "loss": 1.9461, "step": 588 }, { "epoch": 0.71, "learning_rate": 0.0001976196015118535, "loss": 1.9999, "step": 589 }, { "epoch": 0.71, "learning_rate": 0.0001976114048830891, "loss": 2.0169, "step": 590 }, { "epoch": 0.71, "learning_rate": 0.00019760319433706757, "loss": 2.1838, "step": 591 }, { "epoch": 0.71, "learning_rate": 0.00019759496987495955, "loss": 2.3513, "step": 592 }, { "epoch": 0.71, "learning_rate": 0.0001975867314979377, "loss": 1.9915, "step": 593 }, { "epoch": 0.71, "learning_rate": 0.0001975784792071766, "loss": 2.1973, "step": 594 }, { "epoch": 0.71, "learning_rate": 0.00019757021300385286, "loss": 2.3112, "step": 595 }, { "epoch": 0.72, "learning_rate": 0.00019756193288914507, "loss": 2.0992, "step": 596 }, { "epoch": 0.72, "learning_rate": 0.00019755363886423376, "loss": 2.4266, "step": 597 }, { "epoch": 0.72, "learning_rate": 0.00019754533093030148, "loss": 1.7649, "step": 598 }, { "epoch": 0.72, "learning_rate": 0.0001975370090885328, "loss": 1.7573, "step": 599 }, { "epoch": 0.72, "learning_rate": 0.00019752867334011423, "loss": 1.7949, "step": 600 }, { "epoch": 0.72, "learning_rate": 0.0001975203236862342, "loss": 2.0229, "step": 601 }, { "epoch": 0.72, "learning_rate": 0.00019751196012808325, "loss": 2.0519, "step": 602 }, { "epoch": 0.72, "learning_rate": 0.00019750358266685383, "loss": 2.0829, "step": 603 }, { "epoch": 0.73, "learning_rate": 0.00019749519130374038, "loss": 2.0153, "step": 604 }, { "epoch": 0.73, "learning_rate": 0.00019748678603993933, "loss": 1.8594, "step": 605 }, { "epoch": 0.73, "learning_rate": 0.00019747836687664908, "loss": 2.1385, "step": 606 }, { "epoch": 0.73, "learning_rate": 0.00019746993381507003, "loss": 2.1317, "step": 607 }, { "epoch": 0.73, "learning_rate": 0.00019746148685640451, "loss": 1.1676, "step": 608 }, { "epoch": 0.73, "learning_rate": 0.0001974530260018569, "loss": 2.2856, "step": 609 }, { "epoch": 0.73, "learning_rate": 0.0001974445512526336, "loss": 2.1973, "step": 610 }, { "epoch": 0.73, "learning_rate": 0.00019743606260994278, "loss": 1.6912, "step": 611 }, { "epoch": 0.73, "learning_rate": 0.00019742756007499486, "loss": 1.8091, "step": 612 }, { "epoch": 0.74, "learning_rate": 0.00019741904364900208, "loss": 2.0108, "step": 613 }, { "epoch": 0.74, "learning_rate": 0.00019741051333317867, "loss": 2.1061, "step": 614 }, { "epoch": 0.74, "learning_rate": 0.00019740196912874087, "loss": 1.8934, "step": 615 }, { "epoch": 0.74, "learning_rate": 0.00019739341103690693, "loss": 1.8599, "step": 616 }, { "epoch": 0.74, "learning_rate": 0.00019738483905889703, "loss": 2.0025, "step": 617 }, { "epoch": 0.74, "learning_rate": 0.00019737625319593335, "loss": 1.8247, "step": 618 }, { "epoch": 0.74, "learning_rate": 0.00019736765344924005, "loss": 2.222, "step": 619 }, { "epoch": 0.74, "learning_rate": 0.00019735903982004324, "loss": 2.116, "step": 620 }, { "epoch": 0.75, "learning_rate": 0.0001973504123095711, "loss": 1.9183, "step": 621 }, { "epoch": 0.75, "learning_rate": 0.0001973417709190536, "loss": 2.1507, "step": 622 }, { "epoch": 0.75, "learning_rate": 0.00019733311564972296, "loss": 1.7899, "step": 623 }, { "epoch": 0.75, "learning_rate": 0.00019732444650281315, "loss": 2.1005, "step": 624 }, { "epoch": 0.75, "learning_rate": 0.0001973157634795602, "loss": 2.2391, "step": 625 }, { "epoch": 0.75, "learning_rate": 0.00019730706658120214, "loss": 1.9466, "step": 626 }, { "epoch": 0.75, "learning_rate": 0.000197298355808979, "loss": 1.9854, "step": 627 }, { "epoch": 0.75, "eval_loss": 1.9957869052886963, "eval_runtime": 282.5544, "eval_samples_per_second": 0.729, "eval_steps_per_second": 0.729, "step": 627 }, { "epoch": 0.75, "learning_rate": 0.00019728963116413266, "loss": 2.1877, "step": 628 }, { "epoch": 0.76, "learning_rate": 0.00019728089264790712, "loss": 2.2194, "step": 629 }, { "epoch": 0.76, "learning_rate": 0.00019727214026154827, "loss": 1.9631, "step": 630 }, { "epoch": 0.76, "learning_rate": 0.00019726337400630405, "loss": 2.3506, "step": 631 }, { "epoch": 0.76, "learning_rate": 0.00019725459388342432, "loss": 2.0543, "step": 632 }, { "epoch": 0.76, "learning_rate": 0.0001972457998941609, "loss": 2.0402, "step": 633 }, { "epoch": 0.76, "learning_rate": 0.00019723699203976766, "loss": 1.9316, "step": 634 }, { "epoch": 0.76, "learning_rate": 0.0001972281703215004, "loss": 2.2024, "step": 635 }, { "epoch": 0.76, "learning_rate": 0.00019721933474061692, "loss": 1.6776, "step": 636 }, { "epoch": 0.76, "learning_rate": 0.00019721048529837694, "loss": 1.9757, "step": 637 }, { "epoch": 0.77, "learning_rate": 0.00019720162199604222, "loss": 1.7631, "step": 638 }, { "epoch": 0.77, "learning_rate": 0.00019719274483487648, "loss": 2.34, "step": 639 }, { "epoch": 0.77, "learning_rate": 0.0001971838538161454, "loss": 1.8469, "step": 640 }, { "epoch": 0.77, "learning_rate": 0.00019717494894111662, "loss": 2.3151, "step": 641 }, { "epoch": 0.77, "learning_rate": 0.00019716603021105987, "loss": 2.0661, "step": 642 }, { "epoch": 0.77, "learning_rate": 0.00019715709762724667, "loss": 2.0408, "step": 643 }, { "epoch": 0.77, "learning_rate": 0.00019714815119095062, "loss": 1.9848, "step": 644 }, { "epoch": 0.77, "learning_rate": 0.00019713919090344736, "loss": 2.3134, "step": 645 }, { "epoch": 0.78, "learning_rate": 0.00019713021676601438, "loss": 2.4947, "step": 646 }, { "epoch": 0.78, "learning_rate": 0.0001971212287799312, "loss": 2.0515, "step": 647 }, { "epoch": 0.78, "learning_rate": 0.00019711222694647932, "loss": 2.6216, "step": 648 }, { "epoch": 0.78, "learning_rate": 0.00019710321126694216, "loss": 1.6517, "step": 649 }, { "epoch": 0.78, "learning_rate": 0.0001970941817426052, "loss": 2.0408, "step": 650 }, { "epoch": 0.78, "learning_rate": 0.00019708513837475588, "loss": 1.8841, "step": 651 }, { "epoch": 0.78, "learning_rate": 0.00019707608116468356, "loss": 2.1966, "step": 652 }, { "epoch": 0.78, "learning_rate": 0.00019706701011367955, "loss": 1.7587, "step": 653 }, { "epoch": 0.79, "learning_rate": 0.0001970579252230373, "loss": 2.2196, "step": 654 }, { "epoch": 0.79, "learning_rate": 0.00019704882649405198, "loss": 1.8146, "step": 655 }, { "epoch": 0.79, "learning_rate": 0.00019703971392802098, "loss": 2.2932, "step": 656 }, { "epoch": 0.79, "learning_rate": 0.00019703058752624353, "loss": 1.923, "step": 657 }, { "epoch": 0.79, "learning_rate": 0.0001970214472900208, "loss": 2.2393, "step": 658 }, { "epoch": 0.79, "learning_rate": 0.00019701229322065605, "loss": 1.7338, "step": 659 }, { "epoch": 0.79, "learning_rate": 0.00019700312531945442, "loss": 1.7859, "step": 660 }, { "epoch": 0.79, "learning_rate": 0.00019699394358772306, "loss": 2.2719, "step": 661 }, { "epoch": 0.79, "learning_rate": 0.00019698474802677107, "loss": 1.576, "step": 662 }, { "epoch": 0.8, "learning_rate": 0.00019697553863790956, "loss": 2.3333, "step": 663 }, { "epoch": 0.8, "learning_rate": 0.00019696631542245156, "loss": 2.3508, "step": 664 }, { "epoch": 0.8, "learning_rate": 0.00019695707838171216, "loss": 2.1876, "step": 665 }, { "epoch": 0.8, "learning_rate": 0.00019694782751700828, "loss": 1.4863, "step": 666 }, { "epoch": 0.8, "learning_rate": 0.00019693856282965898, "loss": 1.8948, "step": 667 }, { "epoch": 0.8, "learning_rate": 0.00019692928432098512, "loss": 1.6867, "step": 668 }, { "epoch": 0.8, "learning_rate": 0.00019691999199230963, "loss": 1.7682, "step": 669 }, { "epoch": 0.8, "learning_rate": 0.00019691068584495742, "loss": 2.0914, "step": 670 }, { "epoch": 0.81, "learning_rate": 0.00019690136588025535, "loss": 2.1413, "step": 671 }, { "epoch": 0.81, "learning_rate": 0.00019689203209953223, "loss": 2.1275, "step": 672 }, { "epoch": 0.81, "learning_rate": 0.0001968826845041188, "loss": 1.9556, "step": 673 }, { "epoch": 0.81, "learning_rate": 0.00019687332309534792, "loss": 2.2209, "step": 674 }, { "epoch": 0.81, "learning_rate": 0.00019686394787455424, "loss": 1.9853, "step": 675 }, { "epoch": 0.81, "learning_rate": 0.00019685455884307454, "loss": 2.0877, "step": 676 }, { "epoch": 0.81, "learning_rate": 0.00019684515600224743, "loss": 2.1607, "step": 677 }, { "epoch": 0.81, "learning_rate": 0.00019683573935341358, "loss": 2.2664, "step": 678 }, { "epoch": 0.82, "learning_rate": 0.00019682630889791556, "loss": 1.8527, "step": 679 }, { "epoch": 0.82, "learning_rate": 0.000196816864637098, "loss": 1.8417, "step": 680 }, { "epoch": 0.82, "learning_rate": 0.00019680740657230738, "loss": 1.9853, "step": 681 }, { "epoch": 0.82, "learning_rate": 0.00019679793470489228, "loss": 1.8419, "step": 682 }, { "epoch": 0.82, "learning_rate": 0.00019678844903620317, "loss": 1.9971, "step": 683 }, { "epoch": 0.82, "learning_rate": 0.00019677894956759246, "loss": 1.9843, "step": 684 }, { "epoch": 0.82, "learning_rate": 0.00019676943630041462, "loss": 2.376, "step": 685 }, { "epoch": 0.82, "learning_rate": 0.00019675990923602598, "loss": 2.1558, "step": 686 }, { "epoch": 0.82, "learning_rate": 0.00019675036837578494, "loss": 1.5752, "step": 687 }, { "epoch": 0.83, "learning_rate": 0.0001967408137210518, "loss": 1.6704, "step": 688 }, { "epoch": 0.83, "learning_rate": 0.00019673124527318881, "loss": 2.1389, "step": 689 }, { "epoch": 0.83, "learning_rate": 0.00019672166303356028, "loss": 2.126, "step": 690 }, { "epoch": 0.83, "learning_rate": 0.00019671206700353237, "loss": 1.9402, "step": 691 }, { "epoch": 0.83, "learning_rate": 0.00019670245718447335, "loss": 1.6701, "step": 692 }, { "epoch": 0.83, "learning_rate": 0.00019669283357775328, "loss": 1.8134, "step": 693 }, { "epoch": 0.83, "learning_rate": 0.0001966831961847443, "loss": 2.1642, "step": 694 }, { "epoch": 0.83, "learning_rate": 0.00019667354500682054, "loss": 1.8455, "step": 695 }, { "epoch": 0.84, "learning_rate": 0.000196663880045358, "loss": 1.9646, "step": 696 }, { "epoch": 0.84, "learning_rate": 0.0001966542013017347, "loss": 1.9855, "step": 697 }, { "epoch": 0.84, "learning_rate": 0.00019664450877733062, "loss": 1.7029, "step": 698 }, { "epoch": 0.84, "learning_rate": 0.00019663480247352773, "loss": 1.9789, "step": 699 }, { "epoch": 0.84, "learning_rate": 0.0001966250823917099, "loss": 1.8751, "step": 700 }, { "epoch": 0.84, "learning_rate": 0.00019661534853326301, "loss": 2.3644, "step": 701 }, { "epoch": 0.84, "learning_rate": 0.00019660560089957492, "loss": 1.8006, "step": 702 }, { "epoch": 0.84, "learning_rate": 0.0001965958394920354, "loss": 2.2799, "step": 703 }, { "epoch": 0.85, "learning_rate": 0.00019658606431203622, "loss": 1.9258, "step": 704 }, { "epoch": 0.85, "learning_rate": 0.0001965762753609711, "loss": 1.9521, "step": 705 }, { "epoch": 0.85, "learning_rate": 0.00019656647264023575, "loss": 1.9675, "step": 706 }, { "epoch": 0.85, "learning_rate": 0.00019655665615122783, "loss": 2.3686, "step": 707 }, { "epoch": 0.85, "learning_rate": 0.00019654682589534693, "loss": 2.1448, "step": 708 }, { "epoch": 0.85, "learning_rate": 0.00019653698187399466, "loss": 2.2475, "step": 709 }, { "epoch": 0.85, "learning_rate": 0.0001965271240885745, "loss": 1.9417, "step": 710 }, { "epoch": 0.85, "learning_rate": 0.0001965172525404921, "loss": 2.154, "step": 711 }, { "epoch": 0.85, "learning_rate": 0.00019650736723115475, "loss": 2.0646, "step": 712 }, { "epoch": 0.86, "learning_rate": 0.00019649746816197196, "loss": 2.235, "step": 713 }, { "epoch": 0.86, "learning_rate": 0.00019648755533435518, "loss": 1.7122, "step": 714 }, { "epoch": 0.86, "learning_rate": 0.00019647762874971765, "loss": 2.0635, "step": 715 }, { "epoch": 0.86, "learning_rate": 0.00019646768840947474, "loss": 1.8904, "step": 716 }, { "epoch": 0.86, "learning_rate": 0.00019645773431504373, "loss": 1.608, "step": 717 }, { "epoch": 0.86, "learning_rate": 0.00019644776646784388, "loss": 2.2307, "step": 718 }, { "epoch": 0.86, "learning_rate": 0.0001964377848692963, "loss": 2.176, "step": 719 }, { "epoch": 0.86, "learning_rate": 0.00019642778952082426, "loss": 2.1984, "step": 720 }, { "epoch": 0.87, "learning_rate": 0.0001964177804238528, "loss": 2.2625, "step": 721 }, { "epoch": 0.87, "learning_rate": 0.00019640775757980903, "loss": 2.3142, "step": 722 }, { "epoch": 0.87, "learning_rate": 0.00019639772099012197, "loss": 2.2366, "step": 723 }, { "epoch": 0.87, "learning_rate": 0.00019638767065622266, "loss": 1.7823, "step": 724 }, { "epoch": 0.87, "learning_rate": 0.000196377606579544, "loss": 2.0677, "step": 725 }, { "epoch": 0.87, "learning_rate": 0.00019636752876152095, "loss": 1.3337, "step": 726 }, { "epoch": 0.87, "learning_rate": 0.00019635743720359037, "loss": 2.055, "step": 727 }, { "epoch": 0.87, "learning_rate": 0.0001963473319071911, "loss": 1.9888, "step": 728 }, { "epoch": 0.88, "learning_rate": 0.00019633721287376393, "loss": 1.9258, "step": 729 }, { "epoch": 0.88, "learning_rate": 0.00019632708010475165, "loss": 2.3768, "step": 730 }, { "epoch": 0.88, "learning_rate": 0.0001963169336015989, "loss": 1.993, "step": 731 }, { "epoch": 0.88, "learning_rate": 0.00019630677336575242, "loss": 2.1989, "step": 732 }, { "epoch": 0.88, "learning_rate": 0.0001962965993986608, "loss": 2.1216, "step": 733 }, { "epoch": 0.88, "learning_rate": 0.00019628641170177464, "loss": 2.2217, "step": 734 }, { "epoch": 0.88, "learning_rate": 0.00019627621027654648, "loss": 1.8809, "step": 735 }, { "epoch": 0.88, "learning_rate": 0.00019626599512443077, "loss": 2.0864, "step": 736 }, { "epoch": 0.88, "learning_rate": 0.00019625576624688406, "loss": 2.0627, "step": 737 }, { "epoch": 0.89, "learning_rate": 0.00019624552364536473, "loss": 2.1347, "step": 738 }, { "epoch": 0.89, "learning_rate": 0.00019623526732133315, "loss": 1.9998, "step": 739 }, { "epoch": 0.89, "learning_rate": 0.00019622499727625162, "loss": 2.1998, "step": 740 }, { "epoch": 0.89, "learning_rate": 0.00019621471351158443, "loss": 1.974, "step": 741 }, { "epoch": 0.89, "learning_rate": 0.00019620441602879787, "loss": 1.9425, "step": 742 }, { "epoch": 0.89, "learning_rate": 0.00019619410482936008, "loss": 2.6227, "step": 743 }, { "epoch": 0.89, "learning_rate": 0.00019618377991474124, "loss": 2.1209, "step": 744 }, { "epoch": 0.89, "learning_rate": 0.00019617344128641345, "loss": 2.0606, "step": 745 }, { "epoch": 0.9, "learning_rate": 0.00019616308894585078, "loss": 2.296, "step": 746 }, { "epoch": 0.9, "learning_rate": 0.00019615272289452923, "loss": 2.0415, "step": 747 }, { "epoch": 0.9, "learning_rate": 0.0001961423431339268, "loss": 1.9516, "step": 748 }, { "epoch": 0.9, "learning_rate": 0.0001961319496655234, "loss": 2.0468, "step": 749 }, { "epoch": 0.9, "learning_rate": 0.0001961215424908009, "loss": 1.877, "step": 750 }, { "epoch": 0.9, "learning_rate": 0.0001961111216112432, "loss": 1.8129, "step": 751 }, { "epoch": 0.9, "learning_rate": 0.00019610068702833596, "loss": 1.9984, "step": 752 }, { "epoch": 0.9, "learning_rate": 0.00019609023874356707, "loss": 1.9013, "step": 753 }, { "epoch": 0.91, "learning_rate": 0.00019607977675842615, "loss": 2.0546, "step": 754 }, { "epoch": 0.91, "learning_rate": 0.00019606930107440485, "loss": 2.2817, "step": 755 }, { "epoch": 0.91, "learning_rate": 0.0001960588116929968, "loss": 2.0578, "step": 756 }, { "epoch": 0.91, "learning_rate": 0.00019604830861569755, "loss": 2.3521, "step": 757 }, { "epoch": 0.91, "learning_rate": 0.00019603779184400457, "loss": 2.0392, "step": 758 }, { "epoch": 0.91, "learning_rate": 0.0001960272613794174, "loss": 1.9863, "step": 759 }, { "epoch": 0.91, "learning_rate": 0.00019601671722343738, "loss": 2.1889, "step": 760 }, { "epoch": 0.91, "learning_rate": 0.0001960061593775679, "loss": 2.0908, "step": 761 }, { "epoch": 0.91, "learning_rate": 0.0001959955878433143, "loss": 1.986, "step": 762 }, { "epoch": 0.92, "learning_rate": 0.00019598500262218386, "loss": 2.0339, "step": 763 }, { "epoch": 0.92, "learning_rate": 0.00019597440371568574, "loss": 2.0958, "step": 764 }, { "epoch": 0.92, "learning_rate": 0.0001959637911253312, "loss": 1.9866, "step": 765 }, { "epoch": 0.92, "learning_rate": 0.00019595316485263327, "loss": 2.2228, "step": 766 }, { "epoch": 0.92, "learning_rate": 0.00019594252489910706, "loss": 1.915, "step": 767 }, { "epoch": 0.92, "learning_rate": 0.00019593187126626965, "loss": 2.0741, "step": 768 }, { "epoch": 0.92, "learning_rate": 0.00019592120395563994, "loss": 2.5346, "step": 769 }, { "epoch": 0.92, "learning_rate": 0.00019591052296873888, "loss": 2.4908, "step": 770 }, { "epoch": 0.93, "learning_rate": 0.00019589982830708937, "loss": 2.1042, "step": 771 }, { "epoch": 0.93, "learning_rate": 0.00019588911997221625, "loss": 1.8676, "step": 772 }, { "epoch": 0.93, "learning_rate": 0.0001958783979656462, "loss": 1.9152, "step": 773 }, { "epoch": 0.93, "learning_rate": 0.00019586766228890806, "loss": 1.7784, "step": 774 }, { "epoch": 0.93, "learning_rate": 0.0001958569129435324, "loss": 2.0784, "step": 775 }, { "epoch": 0.93, "learning_rate": 0.0001958461499310519, "loss": 1.7262, "step": 776 }, { "epoch": 0.93, "learning_rate": 0.00019583537325300118, "loss": 2.4154, "step": 777 }, { "epoch": 0.93, "learning_rate": 0.00019582458291091663, "loss": 2.3185, "step": 778 }, { "epoch": 0.94, "learning_rate": 0.00019581377890633684, "loss": 2.0981, "step": 779 }, { "epoch": 0.94, "learning_rate": 0.00019580296124080212, "loss": 1.8952, "step": 780 }, { "epoch": 0.94, "learning_rate": 0.00019579212991585493, "loss": 1.7208, "step": 781 }, { "epoch": 0.94, "learning_rate": 0.00019578128493303955, "loss": 2.0209, "step": 782 }, { "epoch": 0.94, "learning_rate": 0.00019577042629390217, "loss": 2.1867, "step": 783 }, { "epoch": 0.94, "learning_rate": 0.0001957595539999911, "loss": 2.0805, "step": 784 }, { "epoch": 0.94, "learning_rate": 0.00019574866805285645, "loss": 2.0451, "step": 785 }, { "epoch": 0.94, "learning_rate": 0.00019573776845405028, "loss": 2.2056, "step": 786 }, { "epoch": 0.94, "learning_rate": 0.0001957268552051267, "loss": 2.0773, "step": 787 }, { "epoch": 0.95, "learning_rate": 0.00019571592830764165, "loss": 2.2036, "step": 788 }, { "epoch": 0.95, "learning_rate": 0.00019570498776315309, "loss": 1.7298, "step": 789 }, { "epoch": 0.95, "learning_rate": 0.0001956940335732209, "loss": 1.8931, "step": 790 }, { "epoch": 0.95, "learning_rate": 0.0001956830657394069, "loss": 2.1567, "step": 791 }, { "epoch": 0.95, "learning_rate": 0.00019567208426327488, "loss": 1.9471, "step": 792 }, { "epoch": 0.95, "learning_rate": 0.00019566108914639054, "loss": 1.8916, "step": 793 }, { "epoch": 0.95, "learning_rate": 0.00019565008039032158, "loss": 2.0111, "step": 794 }, { "epoch": 0.95, "learning_rate": 0.00019563905799663752, "loss": 2.1374, "step": 795 }, { "epoch": 0.96, "learning_rate": 0.00019562802196691003, "loss": 2.3083, "step": 796 }, { "epoch": 0.96, "learning_rate": 0.00019561697230271254, "loss": 2.0381, "step": 797 }, { "epoch": 0.96, "learning_rate": 0.0001956059090056205, "loss": 2.1909, "step": 798 }, { "epoch": 0.96, "learning_rate": 0.00019559483207721133, "loss": 1.9893, "step": 799 }, { "epoch": 0.96, "learning_rate": 0.0001955837415190643, "loss": 2.3178, "step": 800 }, { "epoch": 0.96, "learning_rate": 0.0001955726373327607, "loss": 2.0815, "step": 801 }, { "epoch": 0.96, "learning_rate": 0.00019556151951988376, "loss": 1.6012, "step": 802 }, { "epoch": 0.96, "learning_rate": 0.00019555038808201865, "loss": 1.4965, "step": 803 }, { "epoch": 0.97, "learning_rate": 0.00019553924302075242, "loss": 2.3069, "step": 804 }, { "epoch": 0.97, "learning_rate": 0.00019552808433767415, "loss": 2.2388, "step": 805 }, { "epoch": 0.97, "learning_rate": 0.00019551691203437482, "loss": 2.5662, "step": 806 }, { "epoch": 0.97, "learning_rate": 0.00019550572611244738, "loss": 1.9419, "step": 807 }, { "epoch": 0.97, "learning_rate": 0.00019549452657348663, "loss": 2.3638, "step": 808 }, { "epoch": 0.97, "learning_rate": 0.00019548331341908947, "loss": 2.1567, "step": 809 }, { "epoch": 0.97, "learning_rate": 0.00019547208665085457, "loss": 1.9697, "step": 810 }, { "epoch": 0.97, "learning_rate": 0.00019546084627038268, "loss": 1.9006, "step": 811 }, { "epoch": 0.97, "learning_rate": 0.0001954495922792764, "loss": 2.304, "step": 812 }, { "epoch": 0.98, "learning_rate": 0.0001954383246791403, "loss": 2.0494, "step": 813 }, { "epoch": 0.98, "learning_rate": 0.00019542704347158093, "loss": 1.8562, "step": 814 }, { "epoch": 0.98, "learning_rate": 0.00019541574865820672, "loss": 2.1041, "step": 815 }, { "epoch": 0.98, "learning_rate": 0.00019540444024062804, "loss": 2.22, "step": 816 }, { "epoch": 0.98, "learning_rate": 0.00019539311822045727, "loss": 1.9925, "step": 817 }, { "epoch": 0.98, "learning_rate": 0.00019538178259930869, "loss": 2.3213, "step": 818 }, { "epoch": 0.98, "learning_rate": 0.00019537043337879845, "loss": 2.0319, "step": 819 }, { "epoch": 0.98, "learning_rate": 0.00019535907056054475, "loss": 1.8578, "step": 820 }, { "epoch": 0.99, "learning_rate": 0.00019534769414616764, "loss": 1.4115, "step": 821 }, { "epoch": 0.99, "learning_rate": 0.0001953363041372892, "loss": 2.0731, "step": 822 }, { "epoch": 0.99, "learning_rate": 0.00019532490053553335, "loss": 2.0605, "step": 823 }, { "epoch": 0.99, "learning_rate": 0.00019531348334252607, "loss": 1.9044, "step": 824 }, { "epoch": 0.99, "learning_rate": 0.0001953020525598951, "loss": 1.7405, "step": 825 }, { "epoch": 0.99, "learning_rate": 0.0001952906081892703, "loss": 1.898, "step": 826 }, { "epoch": 0.99, "learning_rate": 0.00019527915023228332, "loss": 1.9696, "step": 827 }, { "epoch": 0.99, "learning_rate": 0.00019526767869056788, "loss": 2.0469, "step": 828 }, { "epoch": 1.0, "learning_rate": 0.00019525619356575952, "loss": 2.0307, "step": 829 }, { "epoch": 1.0, "learning_rate": 0.00019524469485949583, "loss": 2.002, "step": 830 }, { "epoch": 1.0, "learning_rate": 0.00019523318257341622, "loss": 1.9438, "step": 831 }, { "epoch": 1.0, "learning_rate": 0.00019522165670916207, "loss": 1.535, "step": 832 }, { "epoch": 1.0, "learning_rate": 0.0001952101172683768, "loss": 1.7505, "step": 833 }, { "epoch": 1.0, "learning_rate": 0.00019519856425270562, "loss": 2.2248, "step": 834 }, { "epoch": 1.0, "learning_rate": 0.00019518699766379576, "loss": 2.0669, "step": 835 }, { "epoch": 1.0, "learning_rate": 0.00019517541750329635, "loss": 2.0268, "step": 836 }, { "epoch": 1.0, "eval_loss": 1.9969017505645752, "eval_runtime": 283.3157, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.727, "step": 836 }, { "epoch": 1.0, "learning_rate": 0.00019516382377285848, "loss": 1.6712, "step": 837 }, { "epoch": 1.01, "learning_rate": 0.0001951522164741352, "loss": 2.1558, "step": 838 }, { "epoch": 1.01, "learning_rate": 0.00019514059560878138, "loss": 2.1599, "step": 839 }, { "epoch": 1.01, "learning_rate": 0.00019512896117845392, "loss": 1.8762, "step": 840 }, { "epoch": 1.01, "learning_rate": 0.00019511731318481168, "loss": 2.0189, "step": 841 }, { "epoch": 1.01, "learning_rate": 0.00019510565162951537, "loss": 1.9364, "step": 842 }, { "epoch": 1.01, "learning_rate": 0.00019509397651422769, "loss": 1.7319, "step": 843 }, { "epoch": 1.01, "learning_rate": 0.00019508228784061326, "loss": 1.9424, "step": 844 }, { "epoch": 1.01, "learning_rate": 0.0001950705856103386, "loss": 2.277, "step": 845 }, { "epoch": 1.02, "learning_rate": 0.00019505886982507225, "loss": 1.6511, "step": 846 }, { "epoch": 1.0, "learning_rate": 0.0001950471404864846, "loss": 1.9056, "step": 847 }, { "epoch": 1.0, "learning_rate": 0.00019503539759624798, "loss": 1.5105, "step": 848 }, { "epoch": 1.0, "learning_rate": 0.0001950236411560367, "loss": 1.9469, "step": 849 }, { "epoch": 1.0, "learning_rate": 0.00019501187116752693, "loss": 1.5012, "step": 850 }, { "epoch": 1.01, "learning_rate": 0.00019500008763239683, "loss": 1.7086, "step": 851 }, { "epoch": 1.01, "learning_rate": 0.00019498829055232647, "loss": 1.5586, "step": 852 }, { "epoch": 1.01, "learning_rate": 0.00019497647992899788, "loss": 1.5573, "step": 853 }, { "epoch": 1.01, "learning_rate": 0.000194964655764095, "loss": 2.0757, "step": 854 }, { "epoch": 1.01, "learning_rate": 0.00019495281805930367, "loss": 1.5478, "step": 855 }, { "epoch": 1.01, "learning_rate": 0.00019494096681631172, "loss": 1.7068, "step": 856 }, { "epoch": 1.01, "learning_rate": 0.00019492910203680884, "loss": 1.6759, "step": 857 }, { "epoch": 1.01, "learning_rate": 0.0001949172237224867, "loss": 1.4621, "step": 858 }, { "epoch": 1.02, "learning_rate": 0.00019490533187503892, "loss": 1.5359, "step": 859 }, { "epoch": 1.02, "learning_rate": 0.000194893426496161, "loss": 1.9365, "step": 860 }, { "epoch": 1.02, "learning_rate": 0.00019488150758755035, "loss": 1.7089, "step": 861 }, { "epoch": 1.02, "learning_rate": 0.00019486957515090641, "loss": 1.4924, "step": 862 }, { "epoch": 1.02, "learning_rate": 0.00019485762918793046, "loss": 1.387, "step": 863 }, { "epoch": 1.02, "learning_rate": 0.0001948456697003257, "loss": 1.631, "step": 864 }, { "epoch": 1.02, "learning_rate": 0.00019483369668979732, "loss": 1.7953, "step": 865 }, { "epoch": 1.02, "learning_rate": 0.00019482171015805245, "loss": 1.7552, "step": 866 }, { "epoch": 1.03, "learning_rate": 0.00019480971010680002, "loss": 1.8313, "step": 867 }, { "epoch": 1.03, "learning_rate": 0.00019479769653775106, "loss": 1.593, "step": 868 }, { "epoch": 1.03, "learning_rate": 0.00019478566945261837, "loss": 1.9506, "step": 869 }, { "epoch": 1.03, "learning_rate": 0.00019477362885311682, "loss": 1.9598, "step": 870 }, { "epoch": 1.03, "learning_rate": 0.0001947615747409631, "loss": 1.7324, "step": 871 }, { "epoch": 1.03, "learning_rate": 0.00019474950711787585, "loss": 2.1208, "step": 872 }, { "epoch": 1.03, "learning_rate": 0.0001947374259855757, "loss": 1.4111, "step": 873 }, { "epoch": 1.03, "learning_rate": 0.00019472533134578507, "loss": 1.6696, "step": 874 }, { "epoch": 1.03, "learning_rate": 0.00019471322320022849, "loss": 1.6999, "step": 875 }, { "epoch": 1.04, "learning_rate": 0.00019470110155063225, "loss": 2.1287, "step": 876 }, { "epoch": 1.04, "learning_rate": 0.00019468896639872468, "loss": 1.874, "step": 877 }, { "epoch": 1.04, "learning_rate": 0.00019467681774623592, "loss": 1.7149, "step": 878 }, { "epoch": 1.04, "learning_rate": 0.00019466465559489816, "loss": 1.9563, "step": 879 }, { "epoch": 1.04, "learning_rate": 0.00019465247994644545, "loss": 1.3504, "step": 880 }, { "epoch": 1.04, "learning_rate": 0.00019464029080261378, "loss": 1.6176, "step": 881 }, { "epoch": 1.04, "learning_rate": 0.00019462808816514103, "loss": 1.7577, "step": 882 }, { "epoch": 1.04, "learning_rate": 0.00019461587203576706, "loss": 1.8054, "step": 883 }, { "epoch": 1.05, "learning_rate": 0.00019460364241623358, "loss": 2.0246, "step": 884 }, { "epoch": 1.05, "learning_rate": 0.00019459139930828428, "loss": 1.7645, "step": 885 }, { "epoch": 1.05, "learning_rate": 0.0001945791427136648, "loss": 1.9225, "step": 886 }, { "epoch": 1.05, "learning_rate": 0.00019456687263412262, "loss": 1.8967, "step": 887 }, { "epoch": 1.05, "learning_rate": 0.0001945545890714072, "loss": 1.5287, "step": 888 }, { "epoch": 1.05, "learning_rate": 0.0001945422920272699, "loss": 1.5033, "step": 889 }, { "epoch": 1.05, "learning_rate": 0.00019452998150346401, "loss": 2.0148, "step": 890 }, { "epoch": 1.05, "learning_rate": 0.0001945176575017448, "loss": 1.3706, "step": 891 }, { "epoch": 1.06, "learning_rate": 0.0001945053200238693, "loss": 1.7603, "step": 892 }, { "epoch": 1.06, "learning_rate": 0.00019449296907159667, "loss": 1.9884, "step": 893 }, { "epoch": 1.06, "learning_rate": 0.00019448060464668783, "loss": 1.6133, "step": 894 }, { "epoch": 1.06, "learning_rate": 0.00019446822675090565, "loss": 1.7885, "step": 895 }, { "epoch": 1.06, "learning_rate": 0.00019445583538601498, "loss": 1.8573, "step": 896 }, { "epoch": 1.06, "learning_rate": 0.0001944434305537826, "loss": 1.7241, "step": 897 }, { "epoch": 1.06, "learning_rate": 0.0001944310122559771, "loss": 1.8942, "step": 898 }, { "epoch": 1.06, "learning_rate": 0.0001944185804943691, "loss": 1.7541, "step": 899 }, { "epoch": 1.06, "learning_rate": 0.00019440613527073105, "loss": 1.9608, "step": 900 }, { "epoch": 1.07, "learning_rate": 0.00019439367658683745, "loss": 2.0969, "step": 901 }, { "epoch": 1.07, "learning_rate": 0.00019438120444446457, "loss": 2.2589, "step": 902 }, { "epoch": 1.07, "learning_rate": 0.0001943687188453907, "loss": 1.7335, "step": 903 }, { "epoch": 1.07, "learning_rate": 0.00019435621979139596, "loss": 1.8663, "step": 904 }, { "epoch": 1.07, "learning_rate": 0.00019434370728426252, "loss": 1.5627, "step": 905 }, { "epoch": 1.07, "learning_rate": 0.0001943311813257743, "loss": 1.6101, "step": 906 }, { "epoch": 1.07, "learning_rate": 0.00019431864191771732, "loss": 1.9661, "step": 907 }, { "epoch": 1.07, "learning_rate": 0.0001943060890618794, "loss": 1.6487, "step": 908 }, { "epoch": 1.08, "learning_rate": 0.00019429352276005026, "loss": 2.1282, "step": 909 }, { "epoch": 1.08, "learning_rate": 0.00019428094301402162, "loss": 1.6944, "step": 910 }, { "epoch": 1.08, "learning_rate": 0.00019426834982558705, "loss": 1.2433, "step": 911 }, { "epoch": 1.08, "learning_rate": 0.00019425574319654213, "loss": 1.5735, "step": 912 }, { "epoch": 1.08, "learning_rate": 0.00019424312312868417, "loss": 1.6499, "step": 913 }, { "epoch": 1.08, "learning_rate": 0.00019423048962381265, "loss": 1.8366, "step": 914 }, { "epoch": 1.08, "learning_rate": 0.00019421784268372876, "loss": 1.906, "step": 915 }, { "epoch": 1.08, "learning_rate": 0.00019420518231023568, "loss": 1.5976, "step": 916 }, { "epoch": 1.09, "learning_rate": 0.0001941925085051385, "loss": 1.6722, "step": 917 }, { "epoch": 1.09, "learning_rate": 0.00019417982127024422, "loss": 1.8832, "step": 918 }, { "epoch": 1.09, "learning_rate": 0.00019416712060736183, "loss": 1.8865, "step": 919 }, { "epoch": 1.09, "learning_rate": 0.00019415440651830208, "loss": 1.6627, "step": 920 }, { "epoch": 1.09, "learning_rate": 0.0001941416790048778, "loss": 1.3598, "step": 921 }, { "epoch": 1.09, "learning_rate": 0.00019412893806890357, "loss": 2.0506, "step": 922 }, { "epoch": 1.09, "learning_rate": 0.00019411618371219605, "loss": 1.9794, "step": 923 }, { "epoch": 1.09, "learning_rate": 0.0001941034159365737, "loss": 1.7851, "step": 924 }, { "epoch": 1.09, "learning_rate": 0.0001940906347438569, "loss": 1.8312, "step": 925 }, { "epoch": 1.1, "learning_rate": 0.00019407784013586804, "loss": 1.5167, "step": 926 }, { "epoch": 1.1, "learning_rate": 0.00019406503211443128, "loss": 1.5725, "step": 927 }, { "epoch": 1.1, "learning_rate": 0.00019405221068137277, "loss": 1.8857, "step": 928 }, { "epoch": 1.1, "learning_rate": 0.00019403937583852061, "loss": 1.741, "step": 929 }, { "epoch": 1.1, "learning_rate": 0.00019402652758770475, "loss": 1.6748, "step": 930 }, { "epoch": 1.1, "learning_rate": 0.00019401366593075706, "loss": 1.7285, "step": 931 }, { "epoch": 1.1, "learning_rate": 0.00019400079086951135, "loss": 1.7545, "step": 932 }, { "epoch": 1.1, "learning_rate": 0.00019398790240580333, "loss": 1.4491, "step": 933 }, { "epoch": 1.11, "learning_rate": 0.00019397500054147058, "loss": 1.3359, "step": 934 }, { "epoch": 1.11, "learning_rate": 0.00019396208527835263, "loss": 1.9567, "step": 935 }, { "epoch": 1.11, "learning_rate": 0.0001939491566182909, "loss": 2.0011, "step": 936 }, { "epoch": 1.11, "learning_rate": 0.00019393621456312881, "loss": 1.9076, "step": 937 }, { "epoch": 1.11, "learning_rate": 0.00019392325911471155, "loss": 1.5388, "step": 938 }, { "epoch": 1.11, "learning_rate": 0.00019391029027488629, "loss": 1.2337, "step": 939 }, { "epoch": 1.11, "learning_rate": 0.00019389730804550211, "loss": 1.5752, "step": 940 }, { "epoch": 1.11, "learning_rate": 0.00019388431242840998, "loss": 1.9131, "step": 941 }, { "epoch": 1.12, "learning_rate": 0.00019387130342546284, "loss": 1.4177, "step": 942 }, { "epoch": 1.12, "learning_rate": 0.00019385828103851544, "loss": 1.5865, "step": 943 }, { "epoch": 1.12, "learning_rate": 0.0001938452452694245, "loss": 1.6335, "step": 944 }, { "epoch": 1.12, "learning_rate": 0.00019383219612004865, "loss": 1.8599, "step": 945 }, { "epoch": 1.12, "learning_rate": 0.00019381913359224842, "loss": 1.3035, "step": 946 }, { "epoch": 1.12, "learning_rate": 0.00019380605768788621, "loss": 1.7586, "step": 947 }, { "epoch": 1.12, "learning_rate": 0.0001937929684088264, "loss": 1.7334, "step": 948 }, { "epoch": 1.12, "learning_rate": 0.00019377986575693518, "loss": 1.5749, "step": 949 }, { "epoch": 1.12, "learning_rate": 0.00019376674973408075, "loss": 1.874, "step": 950 }, { "epoch": 1.13, "learning_rate": 0.00019375362034213314, "loss": 2.3055, "step": 951 }, { "epoch": 1.13, "learning_rate": 0.00019374047758296433, "loss": 1.5801, "step": 952 }, { "epoch": 1.13, "learning_rate": 0.0001937273214584482, "loss": 1.8788, "step": 953 }, { "epoch": 1.13, "learning_rate": 0.00019371415197046052, "loss": 2.431, "step": 954 }, { "epoch": 1.13, "learning_rate": 0.00019370096912087897, "loss": 1.4963, "step": 955 }, { "epoch": 1.13, "learning_rate": 0.0001936877729115831, "loss": 1.514, "step": 956 }, { "epoch": 1.13, "learning_rate": 0.00019367456334445446, "loss": 1.6099, "step": 957 }, { "epoch": 1.13, "learning_rate": 0.00019366134042137642, "loss": 1.9367, "step": 958 }, { "epoch": 1.14, "learning_rate": 0.00019364810414423427, "loss": 1.7384, "step": 959 }, { "epoch": 1.14, "learning_rate": 0.00019363485451491524, "loss": 1.6166, "step": 960 }, { "epoch": 1.14, "learning_rate": 0.00019362159153530844, "loss": 1.955, "step": 961 }, { "epoch": 1.14, "learning_rate": 0.00019360831520730482, "loss": 1.4189, "step": 962 }, { "epoch": 1.14, "learning_rate": 0.00019359502553279736, "loss": 1.4506, "step": 963 }, { "epoch": 1.14, "learning_rate": 0.00019358172251368087, "loss": 1.7108, "step": 964 }, { "epoch": 1.14, "learning_rate": 0.00019356840615185203, "loss": 1.6641, "step": 965 }, { "epoch": 1.14, "learning_rate": 0.00019355507644920952, "loss": 1.7506, "step": 966 }, { "epoch": 1.15, "learning_rate": 0.00019354173340765382, "loss": 2.0598, "step": 967 }, { "epoch": 1.15, "learning_rate": 0.0001935283770290874, "loss": 1.3494, "step": 968 }, { "epoch": 1.15, "learning_rate": 0.00019351500731541453, "loss": 1.6571, "step": 969 }, { "epoch": 1.15, "learning_rate": 0.0001935016242685415, "loss": 1.6403, "step": 970 }, { "epoch": 1.15, "learning_rate": 0.00019348822789037637, "loss": 1.7555, "step": 971 }, { "epoch": 1.15, "learning_rate": 0.00019347481818282925, "loss": 2.1451, "step": 972 }, { "epoch": 1.15, "learning_rate": 0.000193461395147812, "loss": 1.4522, "step": 973 }, { "epoch": 1.15, "learning_rate": 0.0001934479587872385, "loss": 1.7147, "step": 974 }, { "epoch": 1.15, "learning_rate": 0.0001934345091030245, "loss": 1.3909, "step": 975 }, { "epoch": 1.16, "learning_rate": 0.00019342104609708756, "loss": 1.8104, "step": 976 }, { "epoch": 1.16, "learning_rate": 0.00019340756977134728, "loss": 1.5221, "step": 977 }, { "epoch": 1.16, "learning_rate": 0.000193394080127725, "loss": 1.9447, "step": 978 }, { "epoch": 1.16, "learning_rate": 0.0001933805771681442, "loss": 1.5742, "step": 979 }, { "epoch": 1.16, "learning_rate": 0.00019336706089452996, "loss": 1.5312, "step": 980 }, { "epoch": 1.16, "learning_rate": 0.00019335353130880948, "loss": 1.4304, "step": 981 }, { "epoch": 1.16, "learning_rate": 0.00019333998841291177, "loss": 1.8379, "step": 982 }, { "epoch": 1.16, "learning_rate": 0.00019332643220876773, "loss": 1.877, "step": 983 }, { "epoch": 1.17, "learning_rate": 0.0001933128626983102, "loss": 1.9627, "step": 984 }, { "epoch": 1.17, "learning_rate": 0.0001932992798834739, "loss": 1.7857, "step": 985 }, { "epoch": 1.17, "learning_rate": 0.00019328568376619543, "loss": 1.3189, "step": 986 }, { "epoch": 1.17, "learning_rate": 0.00019327207434841333, "loss": 1.9588, "step": 987 }, { "epoch": 1.17, "learning_rate": 0.00019325845163206795, "loss": 1.3132, "step": 988 }, { "epoch": 1.17, "learning_rate": 0.00019324481561910163, "loss": 1.6304, "step": 989 }, { "epoch": 1.17, "learning_rate": 0.0001932311663114586, "loss": 1.8322, "step": 990 }, { "epoch": 1.17, "learning_rate": 0.00019321750371108486, "loss": 1.4192, "step": 991 }, { "epoch": 1.18, "learning_rate": 0.0001932038278199285, "loss": 1.3915, "step": 992 }, { "epoch": 1.18, "learning_rate": 0.00019319013863993933, "loss": 1.8433, "step": 993 }, { "epoch": 1.18, "learning_rate": 0.0001931764361730692, "loss": 2.1459, "step": 994 }, { "epoch": 1.18, "learning_rate": 0.0001931627204212717, "loss": 1.9799, "step": 995 }, { "epoch": 1.18, "learning_rate": 0.00019314899138650243, "loss": 1.855, "step": 996 }, { "epoch": 1.18, "learning_rate": 0.00019313524907071887, "loss": 1.4763, "step": 997 }, { "epoch": 1.18, "learning_rate": 0.00019312149347588037, "loss": 2.0128, "step": 998 }, { "epoch": 1.18, "learning_rate": 0.00019310772460394814, "loss": 1.6964, "step": 999 }, { "epoch": 1.18, "learning_rate": 0.0001930939424568854, "loss": 1.5864, "step": 1000 }, { "epoch": 1.19, "learning_rate": 0.00019308014703665712, "loss": 1.8437, "step": 1001 }, { "epoch": 1.19, "learning_rate": 0.00019306633834523024, "loss": 2.1677, "step": 1002 }, { "epoch": 1.19, "learning_rate": 0.00019305251638457356, "loss": 1.8872, "step": 1003 }, { "epoch": 1.19, "learning_rate": 0.0001930386811566578, "loss": 1.7312, "step": 1004 }, { "epoch": 1.19, "learning_rate": 0.0001930248326634556, "loss": 1.6772, "step": 1005 }, { "epoch": 1.19, "learning_rate": 0.00019301097090694143, "loss": 1.9666, "step": 1006 }, { "epoch": 1.19, "learning_rate": 0.00019299709588909165, "loss": 1.8946, "step": 1007 }, { "epoch": 1.19, "learning_rate": 0.00019298320761188453, "loss": 2.1784, "step": 1008 }, { "epoch": 1.2, "learning_rate": 0.0001929693060773003, "loss": 2.0249, "step": 1009 }, { "epoch": 1.2, "learning_rate": 0.00019295539128732093, "loss": 1.717, "step": 1010 }, { "epoch": 1.2, "learning_rate": 0.00019294146324393046, "loss": 1.8671, "step": 1011 }, { "epoch": 1.2, "learning_rate": 0.00019292752194911464, "loss": 1.8388, "step": 1012 }, { "epoch": 1.2, "learning_rate": 0.00019291356740486123, "loss": 1.9111, "step": 1013 }, { "epoch": 1.2, "learning_rate": 0.00019289959961315986, "loss": 1.5287, "step": 1014 }, { "epoch": 1.2, "learning_rate": 0.000192885618576002, "loss": 1.5669, "step": 1015 }, { "epoch": 1.2, "learning_rate": 0.00019287162429538105, "loss": 1.9095, "step": 1016 }, { "epoch": 1.21, "learning_rate": 0.00019285761677329232, "loss": 1.9133, "step": 1017 }, { "epoch": 1.21, "learning_rate": 0.00019284359601173294, "loss": 2.1099, "step": 1018 }, { "epoch": 1.21, "learning_rate": 0.000192829562012702, "loss": 1.6303, "step": 1019 }, { "epoch": 1.21, "learning_rate": 0.00019281551477820036, "loss": 1.5907, "step": 1020 }, { "epoch": 1.21, "learning_rate": 0.00019280145431023097, "loss": 1.4897, "step": 1021 }, { "epoch": 1.21, "learning_rate": 0.00019278738061079845, "loss": 1.7414, "step": 1022 }, { "epoch": 1.21, "learning_rate": 0.00019277329368190942, "loss": 1.816, "step": 1023 }, { "epoch": 1.21, "learning_rate": 0.00019275919352557241, "loss": 1.5033, "step": 1024 }, { "epoch": 1.21, "learning_rate": 0.00019274508014379777, "loss": 1.7923, "step": 1025 }, { "epoch": 1.22, "learning_rate": 0.00019273095353859775, "loss": 1.3094, "step": 1026 }, { "epoch": 1.22, "learning_rate": 0.00019271681371198652, "loss": 1.7689, "step": 1027 }, { "epoch": 1.22, "learning_rate": 0.0001927026606659801, "loss": 1.8019, "step": 1028 }, { "epoch": 1.22, "learning_rate": 0.00019268849440259639, "loss": 1.8818, "step": 1029 }, { "epoch": 1.22, "learning_rate": 0.00019267431492385521, "loss": 1.7442, "step": 1030 }, { "epoch": 1.22, "learning_rate": 0.00019266012223177824, "loss": 2.045, "step": 1031 }, { "epoch": 1.22, "learning_rate": 0.00019264591632838903, "loss": 1.7842, "step": 1032 }, { "epoch": 1.22, "learning_rate": 0.00019263169721571308, "loss": 1.5289, "step": 1033 }, { "epoch": 1.23, "learning_rate": 0.00019261746489577765, "loss": 1.6013, "step": 1034 }, { "epoch": 1.23, "learning_rate": 0.00019260321937061202, "loss": 1.7912, "step": 1035 }, { "epoch": 1.23, "learning_rate": 0.0001925889606422473, "loss": 1.7573, "step": 1036 }, { "epoch": 1.23, "learning_rate": 0.0001925746887127164, "loss": 1.7368, "step": 1037 }, { "epoch": 1.23, "learning_rate": 0.00019256040358405424, "loss": 1.7497, "step": 1038 }, { "epoch": 1.23, "learning_rate": 0.00019254610525829758, "loss": 2.0042, "step": 1039 }, { "epoch": 1.23, "learning_rate": 0.00019253179373748504, "loss": 2.0732, "step": 1040 }, { "epoch": 1.23, "learning_rate": 0.00019251746902365708, "loss": 1.8878, "step": 1041 }, { "epoch": 1.24, "learning_rate": 0.00019250313111885618, "loss": 1.9404, "step": 1042 }, { "epoch": 1.24, "learning_rate": 0.00019248878002512654, "loss": 1.5535, "step": 1043 }, { "epoch": 1.24, "learning_rate": 0.00019247441574451432, "loss": 1.9344, "step": 1044 }, { "epoch": 1.24, "learning_rate": 0.0001924600382790676, "loss": 1.9696, "step": 1045 }, { "epoch": 1.24, "eval_loss": 2.064669609069824, "eval_runtime": 283.003, "eval_samples_per_second": 0.728, "eval_steps_per_second": 0.728, "step": 1045 }, { "epoch": 1.24, "learning_rate": 0.00019244564763083624, "loss": 1.4577, "step": 1046 }, { "epoch": 1.24, "learning_rate": 0.00019243124380187204, "loss": 2.1324, "step": 1047 }, { "epoch": 1.24, "learning_rate": 0.00019241682679422873, "loss": 1.4713, "step": 1048 }, { "epoch": 1.24, "learning_rate": 0.00019240239660996177, "loss": 1.7455, "step": 1049 }, { "epoch": 1.24, "learning_rate": 0.0001923879532511287, "loss": 1.5372, "step": 1050 }, { "epoch": 1.25, "learning_rate": 0.00019237349671978872, "loss": 2.0984, "step": 1051 }, { "epoch": 1.25, "learning_rate": 0.0001923590270180031, "loss": 1.5023, "step": 1052 }, { "epoch": 1.25, "learning_rate": 0.0001923445441478348, "loss": 2.0826, "step": 1053 }, { "epoch": 1.25, "learning_rate": 0.00019233004811134886, "loss": 1.7448, "step": 1054 }, { "epoch": 1.25, "learning_rate": 0.00019231553891061208, "loss": 2.0249, "step": 1055 }, { "epoch": 1.25, "learning_rate": 0.00019230101654769312, "loss": 1.6144, "step": 1056 }, { "epoch": 1.25, "learning_rate": 0.0001922864810246626, "loss": 1.9193, "step": 1057 }, { "epoch": 1.25, "learning_rate": 0.00019227193234359292, "loss": 2.0057, "step": 1058 }, { "epoch": 1.26, "learning_rate": 0.00019225737050655842, "loss": 1.9493, "step": 1059 }, { "epoch": 1.26, "learning_rate": 0.00019224279551563532, "loss": 1.9545, "step": 1060 }, { "epoch": 1.26, "learning_rate": 0.0001922282073729017, "loss": 1.8983, "step": 1061 }, { "epoch": 1.26, "learning_rate": 0.00019221360608043746, "loss": 1.9414, "step": 1062 }, { "epoch": 1.26, "learning_rate": 0.00019219899164032447, "loss": 1.8471, "step": 1063 }, { "epoch": 1.26, "learning_rate": 0.0001921843640546464, "loss": 1.7568, "step": 1064 }, { "epoch": 1.26, "learning_rate": 0.00019216972332548887, "loss": 2.0737, "step": 1065 }, { "epoch": 1.26, "learning_rate": 0.0001921550694549393, "loss": 1.6109, "step": 1066 }, { "epoch": 1.27, "learning_rate": 0.000192140402445087, "loss": 1.6684, "step": 1067 }, { "epoch": 1.27, "learning_rate": 0.0001921257222980232, "loss": 1.5101, "step": 1068 }, { "epoch": 1.27, "learning_rate": 0.00019211102901584094, "loss": 1.5262, "step": 1069 }, { "epoch": 1.27, "learning_rate": 0.0001920963226006352, "loss": 1.9757, "step": 1070 }, { "epoch": 1.27, "learning_rate": 0.00019208160305450272, "loss": 2.038, "step": 1071 }, { "epoch": 1.27, "learning_rate": 0.00019206687037954224, "loss": 1.4755, "step": 1072 }, { "epoch": 1.27, "learning_rate": 0.00019205212457785434, "loss": 1.7406, "step": 1073 }, { "epoch": 1.27, "learning_rate": 0.00019203736565154137, "loss": 1.9564, "step": 1074 }, { "epoch": 1.27, "learning_rate": 0.0001920225936027077, "loss": 1.823, "step": 1075 }, { "epoch": 1.28, "learning_rate": 0.0001920078084334595, "loss": 1.8275, "step": 1076 }, { "epoch": 1.28, "learning_rate": 0.0001919930101459048, "loss": 1.7106, "step": 1077 }, { "epoch": 1.28, "learning_rate": 0.00019197819874215347, "loss": 1.5958, "step": 1078 }, { "epoch": 1.28, "learning_rate": 0.00019196337422431735, "loss": 2.1478, "step": 1079 }, { "epoch": 1.28, "learning_rate": 0.0001919485365945101, "loss": 1.7238, "step": 1080 }, { "epoch": 1.28, "learning_rate": 0.00019193368585484718, "loss": 2.0758, "step": 1081 }, { "epoch": 1.28, "learning_rate": 0.000191918822007446, "loss": 1.8403, "step": 1082 }, { "epoch": 1.28, "learning_rate": 0.00019190394505442585, "loss": 1.8286, "step": 1083 }, { "epoch": 1.29, "learning_rate": 0.00019188905499790789, "loss": 1.6992, "step": 1084 }, { "epoch": 1.29, "learning_rate": 0.00019187415184001503, "loss": 1.8512, "step": 1085 }, { "epoch": 1.29, "learning_rate": 0.0001918592355828722, "loss": 1.8236, "step": 1086 }, { "epoch": 1.29, "learning_rate": 0.0001918443062286061, "loss": 1.6173, "step": 1087 }, { "epoch": 1.29, "learning_rate": 0.00019182936377934535, "loss": 1.8593, "step": 1088 }, { "epoch": 1.29, "learning_rate": 0.0001918144082372204, "loss": 1.8184, "step": 1089 }, { "epoch": 1.29, "learning_rate": 0.00019179943960436358, "loss": 1.9655, "step": 1090 }, { "epoch": 1.29, "learning_rate": 0.00019178445788290915, "loss": 1.5858, "step": 1091 }, { "epoch": 1.3, "learning_rate": 0.00019176946307499312, "loss": 1.8359, "step": 1092 }, { "epoch": 1.3, "learning_rate": 0.0001917544551827534, "loss": 1.4354, "step": 1093 }, { "epoch": 1.3, "learning_rate": 0.00019173943420832984, "loss": 1.4312, "step": 1094 }, { "epoch": 1.3, "learning_rate": 0.0001917244001538641, "loss": 2.0024, "step": 1095 }, { "epoch": 1.3, "learning_rate": 0.00019170935302149965, "loss": 1.5994, "step": 1096 }, { "epoch": 1.3, "learning_rate": 0.00019169429281338195, "loss": 2.05, "step": 1097 }, { "epoch": 1.3, "learning_rate": 0.00019167921953165825, "loss": 1.8746, "step": 1098 }, { "epoch": 1.3, "learning_rate": 0.00019166413317847763, "loss": 2.0071, "step": 1099 }, { "epoch": 1.3, "learning_rate": 0.00019164903375599112, "loss": 2.0331, "step": 1100 }, { "epoch": 1.31, "learning_rate": 0.00019163392126635154, "loss": 1.3587, "step": 1101 }, { "epoch": 1.31, "learning_rate": 0.00019161879571171362, "loss": 1.6144, "step": 1102 }, { "epoch": 1.31, "learning_rate": 0.00019160365709423388, "loss": 1.4845, "step": 1103 }, { "epoch": 1.31, "learning_rate": 0.00019158850541607083, "loss": 1.4511, "step": 1104 }, { "epoch": 1.31, "learning_rate": 0.00019157334067938474, "loss": 1.8015, "step": 1105 }, { "epoch": 1.31, "learning_rate": 0.00019155816288633776, "loss": 1.5029, "step": 1106 }, { "epoch": 1.31, "learning_rate": 0.00019154297203909394, "loss": 1.7102, "step": 1107 }, { "epoch": 1.31, "learning_rate": 0.00019152776813981912, "loss": 1.6661, "step": 1108 }, { "epoch": 1.32, "learning_rate": 0.0001915125511906811, "loss": 1.5872, "step": 1109 }, { "epoch": 1.32, "learning_rate": 0.00019149732119384943, "loss": 1.7868, "step": 1110 }, { "epoch": 1.32, "learning_rate": 0.0001914820781514956, "loss": 1.6365, "step": 1111 }, { "epoch": 1.32, "learning_rate": 0.0001914668220657929, "loss": 2.3434, "step": 1112 }, { "epoch": 1.32, "learning_rate": 0.0001914515529389166, "loss": 1.6458, "step": 1113 }, { "epoch": 1.32, "learning_rate": 0.0001914362707730437, "loss": 1.7061, "step": 1114 }, { "epoch": 1.32, "learning_rate": 0.00019142097557035308, "loss": 1.8606, "step": 1115 }, { "epoch": 1.32, "learning_rate": 0.00019140566733302552, "loss": 1.9415, "step": 1116 }, { "epoch": 1.33, "learning_rate": 0.00019139034606324362, "loss": 1.7411, "step": 1117 }, { "epoch": 1.33, "learning_rate": 0.00019137501176319193, "loss": 1.9404, "step": 1118 }, { "epoch": 1.33, "learning_rate": 0.0001913596644350567, "loss": 1.802, "step": 1119 }, { "epoch": 1.33, "learning_rate": 0.00019134430408102615, "loss": 1.2244, "step": 1120 }, { "epoch": 1.33, "learning_rate": 0.00019132893070329036, "loss": 1.902, "step": 1121 }, { "epoch": 1.33, "learning_rate": 0.0001913135443040412, "loss": 1.4578, "step": 1122 }, { "epoch": 1.33, "learning_rate": 0.00019129814488547247, "loss": 1.6816, "step": 1123 }, { "epoch": 1.33, "learning_rate": 0.0001912827324497798, "loss": 1.7293, "step": 1124 }, { "epoch": 1.33, "learning_rate": 0.00019126730699916061, "loss": 1.6344, "step": 1125 }, { "epoch": 1.34, "learning_rate": 0.0001912518685358143, "loss": 1.6819, "step": 1126 }, { "epoch": 1.34, "learning_rate": 0.00019123641706194199, "loss": 1.6761, "step": 1127 }, { "epoch": 1.34, "learning_rate": 0.00019122095257974677, "loss": 1.9222, "step": 1128 }, { "epoch": 1.34, "learning_rate": 0.00019120547509143354, "loss": 1.6117, "step": 1129 }, { "epoch": 1.34, "learning_rate": 0.00019118998459920902, "loss": 1.688, "step": 1130 }, { "epoch": 1.34, "learning_rate": 0.00019117448110528184, "loss": 1.8383, "step": 1131 }, { "epoch": 1.34, "learning_rate": 0.00019115896461186245, "loss": 1.5225, "step": 1132 }, { "epoch": 1.34, "learning_rate": 0.00019114343512116318, "loss": 2.0376, "step": 1133 }, { "epoch": 1.35, "learning_rate": 0.00019112789263539813, "loss": 1.5632, "step": 1134 }, { "epoch": 1.35, "learning_rate": 0.00019111233715678343, "loss": 1.7049, "step": 1135 }, { "epoch": 1.35, "learning_rate": 0.0001910967686875369, "loss": 1.4992, "step": 1136 }, { "epoch": 1.35, "learning_rate": 0.00019108118722987826, "loss": 1.7949, "step": 1137 }, { "epoch": 1.35, "learning_rate": 0.00019106559278602903, "loss": 1.4688, "step": 1138 }, { "epoch": 1.35, "learning_rate": 0.00019104998535821274, "loss": 1.4031, "step": 1139 }, { "epoch": 1.35, "learning_rate": 0.0001910343649486546, "loss": 2.1757, "step": 1140 }, { "epoch": 1.35, "learning_rate": 0.00019101873155958179, "loss": 1.622, "step": 1141 }, { "epoch": 1.36, "learning_rate": 0.00019100308519322322, "loss": 1.9441, "step": 1142 }, { "epoch": 1.36, "learning_rate": 0.0001909874258518098, "loss": 1.8065, "step": 1143 }, { "epoch": 1.36, "learning_rate": 0.00019097175353757417, "loss": 1.8348, "step": 1144 }, { "epoch": 1.36, "learning_rate": 0.00019095606825275083, "loss": 2.0519, "step": 1145 }, { "epoch": 1.36, "learning_rate": 0.00019094036999957624, "loss": 1.9172, "step": 1146 }, { "epoch": 1.36, "learning_rate": 0.00019092465878028854, "loss": 1.9961, "step": 1147 }, { "epoch": 1.36, "learning_rate": 0.00019090893459712787, "loss": 2.1239, "step": 1148 }, { "epoch": 1.36, "learning_rate": 0.00019089319745233611, "loss": 1.3481, "step": 1149 }, { "epoch": 1.36, "learning_rate": 0.00019087744734815708, "loss": 1.5035, "step": 1150 }, { "epoch": 1.37, "learning_rate": 0.00019086168428683638, "loss": 1.818, "step": 1151 }, { "epoch": 1.37, "learning_rate": 0.00019084590827062145, "loss": 2.0481, "step": 1152 }, { "epoch": 1.37, "learning_rate": 0.00019083011930176165, "loss": 1.4444, "step": 1153 }, { "epoch": 1.37, "learning_rate": 0.00019081431738250814, "loss": 1.6059, "step": 1154 }, { "epoch": 1.37, "learning_rate": 0.0001907985025151139, "loss": 2.0284, "step": 1155 }, { "epoch": 1.37, "learning_rate": 0.0001907826747018338, "loss": 1.8603, "step": 1156 }, { "epoch": 1.37, "learning_rate": 0.00019076683394492455, "loss": 1.7189, "step": 1157 }, { "epoch": 1.37, "learning_rate": 0.00019075098024664468, "loss": 1.7497, "step": 1158 }, { "epoch": 1.38, "learning_rate": 0.00019073511360925458, "loss": 1.7489, "step": 1159 }, { "epoch": 1.38, "learning_rate": 0.0001907192340350165, "loss": 1.6059, "step": 1160 }, { "epoch": 1.38, "learning_rate": 0.00019070334152619453, "loss": 1.4407, "step": 1161 }, { "epoch": 1.38, "learning_rate": 0.00019068743608505455, "loss": 1.7025, "step": 1162 }, { "epoch": 1.38, "learning_rate": 0.00019067151771386438, "loss": 1.7921, "step": 1163 }, { "epoch": 1.38, "learning_rate": 0.0001906555864148936, "loss": 1.6147, "step": 1164 }, { "epoch": 1.38, "learning_rate": 0.0001906396421904137, "loss": 1.6192, "step": 1165 }, { "epoch": 1.38, "learning_rate": 0.00019062368504269795, "loss": 1.4341, "step": 1166 }, { "epoch": 1.39, "learning_rate": 0.00019060771497402147, "loss": 1.3054, "step": 1167 }, { "epoch": 1.39, "learning_rate": 0.0001905917319866613, "loss": 2.041, "step": 1168 }, { "epoch": 1.39, "learning_rate": 0.00019057573608289623, "loss": 2.004, "step": 1169 }, { "epoch": 1.39, "learning_rate": 0.00019055972726500695, "loss": 1.4002, "step": 1170 }, { "epoch": 1.39, "learning_rate": 0.00019054370553527595, "loss": 1.5554, "step": 1171 }, { "epoch": 1.39, "learning_rate": 0.00019052767089598754, "loss": 1.9783, "step": 1172 }, { "epoch": 1.39, "learning_rate": 0.000190511623349428, "loss": 1.7443, "step": 1173 }, { "epoch": 1.39, "learning_rate": 0.00019049556289788528, "loss": 1.6089, "step": 1174 }, { "epoch": 1.39, "learning_rate": 0.0001904794895436493, "loss": 1.8784, "step": 1175 }, { "epoch": 1.4, "learning_rate": 0.0001904634032890117, "loss": 2.0985, "step": 1176 }, { "epoch": 1.4, "learning_rate": 0.0001904473041362661, "loss": 1.811, "step": 1177 }, { "epoch": 1.4, "learning_rate": 0.00019043119208770793, "loss": 1.407, "step": 1178 }, { "epoch": 1.4, "learning_rate": 0.0001904150671456343, "loss": 1.7269, "step": 1179 }, { "epoch": 1.4, "learning_rate": 0.00019039892931234435, "loss": 1.8374, "step": 1180 }, { "epoch": 1.4, "learning_rate": 0.00019038277859013896, "loss": 1.583, "step": 1181 }, { "epoch": 1.4, "learning_rate": 0.00019036661498132086, "loss": 1.6407, "step": 1182 }, { "epoch": 1.4, "learning_rate": 0.00019035043848819464, "loss": 2.0828, "step": 1183 }, { "epoch": 1.41, "learning_rate": 0.00019033424911306672, "loss": 1.7067, "step": 1184 }, { "epoch": 1.41, "learning_rate": 0.00019031804685824534, "loss": 1.55, "step": 1185 }, { "epoch": 1.41, "learning_rate": 0.0001903018317260406, "loss": 1.7573, "step": 1186 }, { "epoch": 1.41, "learning_rate": 0.00019028560371876446, "loss": 1.5666, "step": 1187 }, { "epoch": 1.41, "learning_rate": 0.0001902693628387306, "loss": 1.5192, "step": 1188 }, { "epoch": 1.41, "learning_rate": 0.00019025310908825466, "loss": 2.0093, "step": 1189 }, { "epoch": 1.41, "learning_rate": 0.00019023684246965406, "loss": 1.8414, "step": 1190 }, { "epoch": 1.41, "learning_rate": 0.00019022056298524808, "loss": 1.3696, "step": 1191 }, { "epoch": 1.42, "learning_rate": 0.00019020427063735782, "loss": 1.6336, "step": 1192 }, { "epoch": 1.42, "learning_rate": 0.00019018796542830617, "loss": 1.8528, "step": 1193 }, { "epoch": 1.42, "learning_rate": 0.00019017164736041795, "loss": 2.0523, "step": 1194 }, { "epoch": 1.42, "learning_rate": 0.00019015531643601973, "loss": 1.7526, "step": 1195 }, { "epoch": 1.42, "learning_rate": 0.00019013897265743998, "loss": 1.8391, "step": 1196 }, { "epoch": 1.42, "learning_rate": 0.00019012261602700892, "loss": 1.4257, "step": 1197 }, { "epoch": 1.42, "learning_rate": 0.00019010624654705867, "loss": 2.0911, "step": 1198 }, { "epoch": 1.42, "learning_rate": 0.0001900898642199232, "loss": 1.7578, "step": 1199 }, { "epoch": 1.42, "learning_rate": 0.00019007346904793818, "loss": 1.9003, "step": 1200 }, { "epoch": 1.43, "learning_rate": 0.0001900570610334413, "loss": 1.3918, "step": 1201 }, { "epoch": 1.43, "learning_rate": 0.0001900406401787719, "loss": 2.0365, "step": 1202 }, { "epoch": 1.43, "learning_rate": 0.00019002420648627131, "loss": 1.5184, "step": 1203 }, { "epoch": 1.43, "learning_rate": 0.00019000775995828254, "loss": 1.6412, "step": 1204 }, { "epoch": 1.43, "learning_rate": 0.00018999130059715058, "loss": 1.5031, "step": 1205 }, { "epoch": 1.43, "learning_rate": 0.00018997482840522217, "loss": 1.4421, "step": 1206 }, { "epoch": 1.43, "learning_rate": 0.00018995834338484584, "loss": 1.9431, "step": 1207 }, { "epoch": 1.43, "learning_rate": 0.000189941845538372, "loss": 1.8141, "step": 1208 }, { "epoch": 1.44, "learning_rate": 0.0001899253348681529, "loss": 1.7289, "step": 1209 }, { "epoch": 1.44, "learning_rate": 0.00018990881137654258, "loss": 1.7217, "step": 1210 }, { "epoch": 1.44, "learning_rate": 0.000189892275065897, "loss": 2.3727, "step": 1211 }, { "epoch": 1.44, "learning_rate": 0.00018987572593857381, "loss": 1.4833, "step": 1212 }, { "epoch": 1.44, "learning_rate": 0.00018985916399693256, "loss": 2.13, "step": 1213 }, { "epoch": 1.44, "learning_rate": 0.00018984258924333464, "loss": 1.875, "step": 1214 }, { "epoch": 1.44, "learning_rate": 0.00018982600168014323, "loss": 1.783, "step": 1215 }, { "epoch": 1.44, "learning_rate": 0.00018980940130972337, "loss": 1.6815, "step": 1216 }, { "epoch": 1.45, "learning_rate": 0.0001897927881344419, "loss": 2.049, "step": 1217 }, { "epoch": 1.45, "learning_rate": 0.00018977616215666752, "loss": 1.918, "step": 1218 }, { "epoch": 1.45, "learning_rate": 0.0001897595233787707, "loss": 1.5824, "step": 1219 }, { "epoch": 1.45, "learning_rate": 0.00018974287180312377, "loss": 1.7473, "step": 1220 }, { "epoch": 1.45, "learning_rate": 0.00018972620743210093, "loss": 1.6915, "step": 1221 }, { "epoch": 1.45, "learning_rate": 0.0001897095302680781, "loss": 1.7633, "step": 1222 }, { "epoch": 1.45, "learning_rate": 0.00018969284031343308, "loss": 1.6921, "step": 1223 }, { "epoch": 1.45, "learning_rate": 0.00018967613757054554, "loss": 1.5433, "step": 1224 }, { "epoch": 1.45, "learning_rate": 0.00018965942204179686, "loss": 1.9389, "step": 1225 }, { "epoch": 1.46, "learning_rate": 0.00018964269372957038, "loss": 1.5625, "step": 1226 }, { "epoch": 1.46, "learning_rate": 0.00018962595263625115, "loss": 1.4835, "step": 1227 }, { "epoch": 1.46, "learning_rate": 0.00018960919876422611, "loss": 1.8479, "step": 1228 }, { "epoch": 1.46, "learning_rate": 0.00018959243211588397, "loss": 1.7861, "step": 1229 }, { "epoch": 1.46, "learning_rate": 0.00018957565269361531, "loss": 1.867, "step": 1230 }, { "epoch": 1.46, "learning_rate": 0.00018955886049981245, "loss": 1.9383, "step": 1231 }, { "epoch": 1.46, "learning_rate": 0.0001895420555368697, "loss": 1.755, "step": 1232 }, { "epoch": 1.46, "learning_rate": 0.000189525237807183, "loss": 1.5166, "step": 1233 }, { "epoch": 1.47, "learning_rate": 0.00018950840731315024, "loss": 1.8629, "step": 1234 }, { "epoch": 1.47, "learning_rate": 0.000189491564057171, "loss": 1.6845, "step": 1235 }, { "epoch": 1.47, "learning_rate": 0.00018947470804164685, "loss": 1.4748, "step": 1236 }, { "epoch": 1.47, "learning_rate": 0.00018945783926898105, "loss": 1.8907, "step": 1237 }, { "epoch": 1.47, "learning_rate": 0.00018944095774157873, "loss": 1.5758, "step": 1238 }, { "epoch": 1.47, "learning_rate": 0.00018942406346184683, "loss": 1.6367, "step": 1239 }, { "epoch": 1.47, "learning_rate": 0.00018940715643219407, "loss": 1.7285, "step": 1240 }, { "epoch": 1.47, "learning_rate": 0.00018939023665503108, "loss": 1.5714, "step": 1241 }, { "epoch": 1.48, "learning_rate": 0.0001893733041327702, "loss": 1.9308, "step": 1242 }, { "epoch": 1.48, "learning_rate": 0.00018935635886782568, "loss": 1.9153, "step": 1243 }, { "epoch": 1.48, "learning_rate": 0.00018933940086261351, "loss": 1.8009, "step": 1244 }, { "epoch": 1.48, "learning_rate": 0.00018932243011955154, "loss": 1.7392, "step": 1245 }, { "epoch": 1.48, "learning_rate": 0.00018930544664105944, "loss": 1.821, "step": 1246 }, { "epoch": 1.48, "learning_rate": 0.0001892884504295587, "loss": 1.475, "step": 1247 }, { "epoch": 1.48, "learning_rate": 0.00018927144148747255, "loss": 1.8937, "step": 1248 }, { "epoch": 1.48, "learning_rate": 0.00018925441981722618, "loss": 1.6958, "step": 1249 }, { "epoch": 1.48, "learning_rate": 0.00018923738542124644, "loss": 1.6836, "step": 1250 }, { "epoch": 1.49, "learning_rate": 0.00018922033830196208, "loss": 2.0213, "step": 1251 }, { "epoch": 1.49, "learning_rate": 0.00018920327846180365, "loss": 1.9572, "step": 1252 }, { "epoch": 1.49, "learning_rate": 0.00018918620590320352, "loss": 1.9449, "step": 1253 }, { "epoch": 1.49, "learning_rate": 0.00018916912062859583, "loss": 1.7297, "step": 1254 }, { "epoch": 1.49, "eval_loss": 2.0551259517669678, "eval_runtime": 283.8338, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 1254 }, { "epoch": 1.49, "learning_rate": 0.00018915202264041664, "loss": 1.8158, "step": 1255 }, { "epoch": 1.49, "learning_rate": 0.0001891349119411037, "loss": 1.921, "step": 1256 }, { "epoch": 1.49, "learning_rate": 0.00018911778853309658, "loss": 1.5726, "step": 1257 }, { "epoch": 1.49, "learning_rate": 0.0001891006524188368, "loss": 1.6641, "step": 1258 }, { "epoch": 1.5, "learning_rate": 0.00018908350360076752, "loss": 1.5841, "step": 1259 }, { "epoch": 1.5, "learning_rate": 0.00018906634208133385, "loss": 1.8567, "step": 1260 }, { "epoch": 1.5, "learning_rate": 0.00018904916786298257, "loss": 1.5584, "step": 1261 }, { "epoch": 1.5, "learning_rate": 0.00018903198094816242, "loss": 1.6615, "step": 1262 }, { "epoch": 1.5, "learning_rate": 0.00018901478133932385, "loss": 1.7477, "step": 1263 }, { "epoch": 1.5, "learning_rate": 0.00018899756903891914, "loss": 1.3796, "step": 1264 }, { "epoch": 1.5, "learning_rate": 0.00018898034404940238, "loss": 1.7991, "step": 1265 }, { "epoch": 1.5, "learning_rate": 0.00018896310637322953, "loss": 1.4944, "step": 1266 }, { "epoch": 1.51, "learning_rate": 0.00018894585601285827, "loss": 1.5719, "step": 1267 }, { "epoch": 1.51, "learning_rate": 0.00018892859297074812, "loss": 1.5495, "step": 1268 }, { "epoch": 1.51, "learning_rate": 0.00018891131724936043, "loss": 1.7611, "step": 1269 }, { "epoch": 1.51, "learning_rate": 0.00018889402885115833, "loss": 1.5991, "step": 1270 }, { "epoch": 1.51, "learning_rate": 0.00018887672777860676, "loss": 1.8849, "step": 1271 }, { "epoch": 1.51, "learning_rate": 0.0001888594140341725, "loss": 1.6136, "step": 1272 }, { "epoch": 1.51, "learning_rate": 0.0001888420876203241, "loss": 1.8288, "step": 1273 }, { "epoch": 1.51, "learning_rate": 0.0001888247485395319, "loss": 1.6625, "step": 1274 }, { "epoch": 1.52, "learning_rate": 0.00018880739679426816, "loss": 1.49, "step": 1275 }, { "epoch": 1.52, "learning_rate": 0.00018879003238700675, "loss": 1.874, "step": 1276 }, { "epoch": 1.52, "learning_rate": 0.00018877265532022352, "loss": 1.751, "step": 1277 }, { "epoch": 1.52, "learning_rate": 0.00018875526559639604, "loss": 1.9882, "step": 1278 }, { "epoch": 1.52, "learning_rate": 0.00018873786321800374, "loss": 1.5214, "step": 1279 }, { "epoch": 1.52, "learning_rate": 0.0001887204481875278, "loss": 1.741, "step": 1280 }, { "epoch": 1.52, "learning_rate": 0.00018870302050745118, "loss": 1.7798, "step": 1281 }, { "epoch": 1.52, "learning_rate": 0.00018868558018025878, "loss": 1.9258, "step": 1282 }, { "epoch": 1.52, "learning_rate": 0.0001886681272084371, "loss": 1.9096, "step": 1283 }, { "epoch": 1.53, "learning_rate": 0.00018865066159447466, "loss": 1.6729, "step": 1284 }, { "epoch": 1.53, "learning_rate": 0.00018863318334086157, "loss": 1.6239, "step": 1285 }, { "epoch": 1.53, "learning_rate": 0.00018861569245008994, "loss": 1.9857, "step": 1286 }, { "epoch": 1.53, "learning_rate": 0.00018859818892465354, "loss": 1.9905, "step": 1287 }, { "epoch": 1.53, "learning_rate": 0.000188580672767048, "loss": 2.0073, "step": 1288 }, { "epoch": 1.53, "learning_rate": 0.00018856314397977075, "loss": 1.7109, "step": 1289 }, { "epoch": 1.53, "learning_rate": 0.000188545602565321, "loss": 1.3727, "step": 1290 }, { "epoch": 1.53, "learning_rate": 0.00018852804852619975, "loss": 1.7045, "step": 1291 }, { "epoch": 1.54, "learning_rate": 0.00018851048186490992, "loss": 1.9042, "step": 1292 }, { "epoch": 1.54, "learning_rate": 0.00018849290258395602, "loss": 1.7174, "step": 1293 }, { "epoch": 1.54, "learning_rate": 0.00018847531068584452, "loss": 1.6502, "step": 1294 }, { "epoch": 1.54, "learning_rate": 0.00018845770617308366, "loss": 1.8582, "step": 1295 }, { "epoch": 1.54, "learning_rate": 0.0001884400890481834, "loss": 1.4846, "step": 1296 }, { "epoch": 1.54, "learning_rate": 0.00018842245931365562, "loss": 1.5428, "step": 1297 }, { "epoch": 1.54, "learning_rate": 0.00018840481697201392, "loss": 1.7266, "step": 1298 }, { "epoch": 1.54, "learning_rate": 0.0001883871620257737, "loss": 1.9324, "step": 1299 }, { "epoch": 1.55, "learning_rate": 0.00018836949447745215, "loss": 1.577, "step": 1300 }, { "epoch": 1.55, "learning_rate": 0.0001883518143295683, "loss": 1.6388, "step": 1301 }, { "epoch": 1.55, "learning_rate": 0.00018833412158464298, "loss": 1.9201, "step": 1302 }, { "epoch": 1.55, "learning_rate": 0.00018831641624519877, "loss": 1.6478, "step": 1303 }, { "epoch": 1.55, "learning_rate": 0.00018829869831376005, "loss": 1.6826, "step": 1304 }, { "epoch": 1.55, "learning_rate": 0.00018828096779285303, "loss": 1.8513, "step": 1305 }, { "epoch": 1.55, "learning_rate": 0.00018826322468500566, "loss": 1.571, "step": 1306 }, { "epoch": 1.55, "learning_rate": 0.00018824546899274777, "loss": 1.1602, "step": 1307 }, { "epoch": 1.55, "learning_rate": 0.0001882277007186109, "loss": 1.9998, "step": 1308 }, { "epoch": 1.56, "learning_rate": 0.0001882099198651284, "loss": 1.7034, "step": 1309 }, { "epoch": 1.56, "learning_rate": 0.0001881921264348355, "loss": 1.4031, "step": 1310 }, { "epoch": 1.56, "learning_rate": 0.00018817432043026911, "loss": 1.8413, "step": 1311 }, { "epoch": 1.56, "learning_rate": 0.00018815650185396797, "loss": 1.6606, "step": 1312 }, { "epoch": 1.56, "learning_rate": 0.00018813867070847264, "loss": 1.5792, "step": 1313 }, { "epoch": 1.56, "learning_rate": 0.00018812082699632546, "loss": 1.4525, "step": 1314 }, { "epoch": 1.56, "learning_rate": 0.00018810297072007054, "loss": 1.4906, "step": 1315 }, { "epoch": 1.56, "learning_rate": 0.00018808510188225377, "loss": 1.6284, "step": 1316 }, { "epoch": 1.57, "learning_rate": 0.0001880672204854229, "loss": 1.7281, "step": 1317 }, { "epoch": 1.57, "learning_rate": 0.0001880493265321274, "loss": 1.5345, "step": 1318 }, { "epoch": 1.57, "learning_rate": 0.00018803142002491856, "loss": 2.0933, "step": 1319 }, { "epoch": 1.57, "learning_rate": 0.00018801350096634946, "loss": 1.9372, "step": 1320 }, { "epoch": 1.57, "learning_rate": 0.000187995569358975, "loss": 1.7151, "step": 1321 }, { "epoch": 1.57, "learning_rate": 0.00018797762520535177, "loss": 1.4823, "step": 1322 }, { "epoch": 1.57, "learning_rate": 0.0001879596685080383, "loss": 2.0495, "step": 1323 }, { "epoch": 1.57, "learning_rate": 0.00018794169926959474, "loss": 2.2966, "step": 1324 }, { "epoch": 1.58, "learning_rate": 0.00018792371749258314, "loss": 1.7868, "step": 1325 }, { "epoch": 1.58, "learning_rate": 0.00018790572317956735, "loss": 1.9403, "step": 1326 }, { "epoch": 1.58, "learning_rate": 0.00018788771633311292, "loss": 1.6687, "step": 1327 }, { "epoch": 1.58, "learning_rate": 0.00018786969695578723, "loss": 1.8422, "step": 1328 }, { "epoch": 1.58, "learning_rate": 0.00018785166505015948, "loss": 1.5916, "step": 1329 }, { "epoch": 1.58, "learning_rate": 0.00018783362061880062, "loss": 1.9119, "step": 1330 }, { "epoch": 1.58, "learning_rate": 0.00018781556366428336, "loss": 1.4903, "step": 1331 }, { "epoch": 1.58, "learning_rate": 0.00018779749418918227, "loss": 1.9497, "step": 1332 }, { "epoch": 1.58, "learning_rate": 0.00018777941219607364, "loss": 1.9462, "step": 1333 }, { "epoch": 1.59, "learning_rate": 0.00018776131768753556, "loss": 2.0474, "step": 1334 }, { "epoch": 1.59, "learning_rate": 0.00018774321066614795, "loss": 1.4474, "step": 1335 }, { "epoch": 1.59, "learning_rate": 0.00018772509113449245, "loss": 1.8315, "step": 1336 }, { "epoch": 1.59, "learning_rate": 0.00018770695909515247, "loss": 1.7684, "step": 1337 }, { "epoch": 1.59, "learning_rate": 0.00018768881455071332, "loss": 1.2675, "step": 1338 }, { "epoch": 1.59, "learning_rate": 0.000187670657503762, "loss": 1.8226, "step": 1339 }, { "epoch": 1.59, "learning_rate": 0.00018765248795688726, "loss": 2.2112, "step": 1340 }, { "epoch": 1.59, "learning_rate": 0.0001876343059126797, "loss": 1.3627, "step": 1341 }, { "epoch": 1.6, "learning_rate": 0.00018761611137373173, "loss": 2.1488, "step": 1342 }, { "epoch": 1.6, "learning_rate": 0.00018759790434263744, "loss": 1.9842, "step": 1343 }, { "epoch": 1.6, "learning_rate": 0.00018757968482199276, "loss": 1.9775, "step": 1344 }, { "epoch": 1.6, "learning_rate": 0.00018756145281439545, "loss": 1.6835, "step": 1345 }, { "epoch": 1.6, "learning_rate": 0.0001875432083224449, "loss": 1.5272, "step": 1346 }, { "epoch": 1.6, "learning_rate": 0.0001875249513487425, "loss": 1.7539, "step": 1347 }, { "epoch": 1.6, "learning_rate": 0.00018750668189589117, "loss": 1.874, "step": 1348 }, { "epoch": 1.6, "learning_rate": 0.00018748839996649583, "loss": 1.5858, "step": 1349 }, { "epoch": 1.61, "learning_rate": 0.00018747010556316305, "loss": 1.9298, "step": 1350 }, { "epoch": 1.61, "learning_rate": 0.0001874517986885012, "loss": 1.5079, "step": 1351 }, { "epoch": 1.61, "learning_rate": 0.00018743347934512046, "loss": 1.884, "step": 1352 }, { "epoch": 1.61, "learning_rate": 0.00018741514753563277, "loss": 1.7978, "step": 1353 }, { "epoch": 1.61, "learning_rate": 0.0001873968032626518, "loss": 1.7735, "step": 1354 }, { "epoch": 1.61, "learning_rate": 0.00018737844652879312, "loss": 1.7227, "step": 1355 }, { "epoch": 1.61, "learning_rate": 0.00018736007733667393, "loss": 1.8458, "step": 1356 }, { "epoch": 1.61, "learning_rate": 0.00018734169568891334, "loss": 1.3268, "step": 1357 }, { "epoch": 1.61, "learning_rate": 0.0001873233015881321, "loss": 1.3782, "step": 1358 }, { "epoch": 1.62, "learning_rate": 0.00018730489503695287, "loss": 1.9614, "step": 1359 }, { "epoch": 1.62, "learning_rate": 0.00018728647603800003, "loss": 1.7755, "step": 1360 }, { "epoch": 1.62, "learning_rate": 0.00018726804459389963, "loss": 1.7961, "step": 1361 }, { "epoch": 1.62, "learning_rate": 0.00018724960070727972, "loss": 1.7158, "step": 1362 }, { "epoch": 1.62, "learning_rate": 0.0001872311443807699, "loss": 1.6303, "step": 1363 }, { "epoch": 1.62, "learning_rate": 0.0001872126756170017, "loss": 1.8734, "step": 1364 }, { "epoch": 1.62, "learning_rate": 0.00018719419441860834, "loss": 1.5143, "step": 1365 }, { "epoch": 1.62, "learning_rate": 0.0001871757007882248, "loss": 1.498, "step": 1366 }, { "epoch": 1.63, "learning_rate": 0.0001871571947284879, "loss": 1.0886, "step": 1367 }, { "epoch": 1.63, "learning_rate": 0.00018713867624203621, "loss": 1.6633, "step": 1368 }, { "epoch": 1.63, "learning_rate": 0.00018712014533151008, "loss": 1.8895, "step": 1369 }, { "epoch": 1.63, "learning_rate": 0.00018710160199955156, "loss": 1.4178, "step": 1370 }, { "epoch": 1.63, "learning_rate": 0.00018708304624880456, "loss": 1.6814, "step": 1371 }, { "epoch": 1.63, "learning_rate": 0.0001870644780819147, "loss": 1.8671, "step": 1372 }, { "epoch": 1.63, "learning_rate": 0.00018704589750152944, "loss": 1.4786, "step": 1373 }, { "epoch": 1.63, "learning_rate": 0.00018702730451029796, "loss": 1.8622, "step": 1374 }, { "epoch": 1.64, "learning_rate": 0.00018700869911087115, "loss": 1.8891, "step": 1375 }, { "epoch": 1.64, "learning_rate": 0.0001869900813059018, "loss": 2.0493, "step": 1376 }, { "epoch": 1.64, "learning_rate": 0.00018697145109804436, "loss": 1.7238, "step": 1377 }, { "epoch": 1.64, "learning_rate": 0.00018695280848995513, "loss": 1.7826, "step": 1378 }, { "epoch": 1.64, "learning_rate": 0.0001869341534842921, "loss": 1.8557, "step": 1379 }, { "epoch": 1.64, "learning_rate": 0.0001869154860837151, "loss": 1.7492, "step": 1380 }, { "epoch": 1.64, "learning_rate": 0.0001868968062908857, "loss": 1.7441, "step": 1381 }, { "epoch": 1.64, "learning_rate": 0.0001868781141084672, "loss": 1.8322, "step": 1382 }, { "epoch": 1.64, "learning_rate": 0.0001868594095391247, "loss": 1.8177, "step": 1383 }, { "epoch": 1.65, "learning_rate": 0.00018684069258552508, "loss": 2.0001, "step": 1384 }, { "epoch": 1.65, "learning_rate": 0.00018682196325033696, "loss": 1.5046, "step": 1385 }, { "epoch": 1.65, "learning_rate": 0.00018680322153623075, "loss": 1.6789, "step": 1386 }, { "epoch": 1.65, "learning_rate": 0.0001867844674458786, "loss": 1.6951, "step": 1387 }, { "epoch": 1.65, "learning_rate": 0.00018676570098195443, "loss": 2.0334, "step": 1388 }, { "epoch": 1.65, "learning_rate": 0.00018674692214713388, "loss": 1.7833, "step": 1389 }, { "epoch": 1.65, "learning_rate": 0.0001867281309440945, "loss": 1.82, "step": 1390 }, { "epoch": 1.65, "learning_rate": 0.00018670932737551547, "loss": 1.8155, "step": 1391 }, { "epoch": 1.66, "learning_rate": 0.00018669051144407775, "loss": 1.7912, "step": 1392 }, { "epoch": 1.66, "learning_rate": 0.00018667168315246406, "loss": 1.5816, "step": 1393 }, { "epoch": 1.66, "learning_rate": 0.00018665284250335895, "loss": 1.7521, "step": 1394 }, { "epoch": 1.66, "learning_rate": 0.00018663398949944865, "loss": 1.4287, "step": 1395 }, { "epoch": 1.66, "learning_rate": 0.00018661512414342127, "loss": 1.6026, "step": 1396 }, { "epoch": 1.66, "learning_rate": 0.00018659624643796647, "loss": 1.6953, "step": 1397 }, { "epoch": 1.66, "learning_rate": 0.00018657735638577587, "loss": 1.8515, "step": 1398 }, { "epoch": 1.66, "learning_rate": 0.00018655845398954276, "loss": 2.0384, "step": 1399 }, { "epoch": 1.67, "learning_rate": 0.00018653953925196225, "loss": 1.5458, "step": 1400 }, { "epoch": 1.67, "learning_rate": 0.00018652061217573114, "loss": 1.7166, "step": 1401 }, { "epoch": 1.67, "learning_rate": 0.000186501672763548, "loss": 1.5653, "step": 1402 }, { "epoch": 1.67, "learning_rate": 0.00018648272101811318, "loss": 2.0928, "step": 1403 }, { "epoch": 1.67, "learning_rate": 0.00018646375694212884, "loss": 1.605, "step": 1404 }, { "epoch": 1.67, "learning_rate": 0.00018644478053829878, "loss": 1.4734, "step": 1405 }, { "epoch": 1.67, "learning_rate": 0.00018642579180932865, "loss": 2.0578, "step": 1406 }, { "epoch": 1.67, "learning_rate": 0.00018640679075792582, "loss": 1.9823, "step": 1407 }, { "epoch": 1.67, "learning_rate": 0.00018638777738679943, "loss": 2.0551, "step": 1408 }, { "epoch": 1.68, "learning_rate": 0.00018636875169866036, "loss": 1.6315, "step": 1409 }, { "epoch": 1.68, "learning_rate": 0.0001863497136962213, "loss": 1.8965, "step": 1410 }, { "epoch": 1.68, "learning_rate": 0.0001863306633821966, "loss": 1.3584, "step": 1411 }, { "epoch": 1.68, "learning_rate": 0.00018631160075930245, "loss": 1.9673, "step": 1412 }, { "epoch": 1.68, "learning_rate": 0.00018629252583025676, "loss": 1.5277, "step": 1413 }, { "epoch": 1.68, "learning_rate": 0.0001862734385977792, "loss": 1.6788, "step": 1414 }, { "epoch": 1.68, "learning_rate": 0.00018625433906459116, "loss": 1.432, "step": 1415 }, { "epoch": 1.68, "learning_rate": 0.00018623522723341588, "loss": 1.8102, "step": 1416 }, { "epoch": 1.69, "learning_rate": 0.00018621610310697823, "loss": 1.6713, "step": 1417 }, { "epoch": 1.69, "learning_rate": 0.00018619696668800492, "loss": 1.6989, "step": 1418 }, { "epoch": 1.69, "learning_rate": 0.0001861778179792244, "loss": 1.7645, "step": 1419 }, { "epoch": 1.69, "learning_rate": 0.00018615865698336684, "loss": 1.594, "step": 1420 }, { "epoch": 1.69, "learning_rate": 0.00018613948370316415, "loss": 1.8751, "step": 1421 }, { "epoch": 1.69, "learning_rate": 0.00018612029814135014, "loss": 1.64, "step": 1422 }, { "epoch": 1.69, "learning_rate": 0.00018610110030066007, "loss": 1.5066, "step": 1423 }, { "epoch": 1.69, "learning_rate": 0.0001860818901838313, "loss": 1.9817, "step": 1424 }, { "epoch": 1.7, "learning_rate": 0.00018606266779360266, "loss": 2.056, "step": 1425 }, { "epoch": 1.7, "learning_rate": 0.0001860434331327149, "loss": 1.6997, "step": 1426 }, { "epoch": 1.7, "learning_rate": 0.00018602418620391044, "loss": 1.5573, "step": 1427 }, { "epoch": 1.7, "learning_rate": 0.0001860049270099335, "loss": 1.8427, "step": 1428 }, { "epoch": 1.7, "learning_rate": 0.00018598565555353, "loss": 2.012, "step": 1429 }, { "epoch": 1.7, "learning_rate": 0.00018596637183744763, "loss": 1.7976, "step": 1430 }, { "epoch": 1.7, "learning_rate": 0.00018594707586443585, "loss": 1.4, "step": 1431 }, { "epoch": 1.7, "learning_rate": 0.0001859277676372458, "loss": 1.8717, "step": 1432 }, { "epoch": 1.7, "learning_rate": 0.00018590844715863045, "loss": 1.4311, "step": 1433 }, { "epoch": 1.71, "learning_rate": 0.00018588911443134448, "loss": 1.5903, "step": 1434 }, { "epoch": 1.71, "learning_rate": 0.00018586976945814425, "loss": 2.0898, "step": 1435 }, { "epoch": 1.71, "learning_rate": 0.00018585041224178803, "loss": 1.5302, "step": 1436 }, { "epoch": 1.71, "learning_rate": 0.00018583104278503568, "loss": 1.9582, "step": 1437 }, { "epoch": 1.71, "learning_rate": 0.00018581166109064886, "loss": 1.5264, "step": 1438 }, { "epoch": 1.71, "learning_rate": 0.00018579226716139096, "loss": 1.6551, "step": 1439 }, { "epoch": 1.71, "learning_rate": 0.00018577286100002723, "loss": 1.7774, "step": 1440 }, { "epoch": 1.71, "learning_rate": 0.00018575344260932444, "loss": 1.8316, "step": 1441 }, { "epoch": 1.72, "learning_rate": 0.0001857340119920513, "loss": 1.3916, "step": 1442 }, { "epoch": 1.72, "learning_rate": 0.00018571456915097818, "loss": 1.6728, "step": 1443 }, { "epoch": 1.72, "learning_rate": 0.0001856951140888772, "loss": 1.7247, "step": 1444 }, { "epoch": 1.72, "learning_rate": 0.00018567564680852224, "loss": 1.4539, "step": 1445 }, { "epoch": 1.72, "learning_rate": 0.00018565616731268888, "loss": 1.613, "step": 1446 }, { "epoch": 1.72, "learning_rate": 0.0001856366756041545, "loss": 1.757, "step": 1447 }, { "epoch": 1.72, "learning_rate": 0.00018561717168569816, "loss": 1.6903, "step": 1448 }, { "epoch": 1.72, "learning_rate": 0.00018559765556010072, "loss": 1.7322, "step": 1449 }, { "epoch": 1.73, "learning_rate": 0.00018557812723014476, "loss": 1.5627, "step": 1450 }, { "epoch": 1.73, "learning_rate": 0.00018555858669861458, "loss": 1.8751, "step": 1451 }, { "epoch": 1.73, "learning_rate": 0.00018553903396829625, "loss": 1.2721, "step": 1452 }, { "epoch": 1.73, "learning_rate": 0.00018551946904197752, "loss": 1.8167, "step": 1453 }, { "epoch": 1.73, "learning_rate": 0.00018549989192244797, "loss": 1.6602, "step": 1454 }, { "epoch": 1.73, "learning_rate": 0.00018548030261249885, "loss": 1.9053, "step": 1455 }, { "epoch": 1.73, "learning_rate": 0.00018546070111492315, "loss": 1.7721, "step": 1456 }, { "epoch": 1.73, "learning_rate": 0.00018544108743251566, "loss": 2.1421, "step": 1457 }, { "epoch": 1.73, "learning_rate": 0.00018542146156807284, "loss": 1.5076, "step": 1458 }, { "epoch": 1.74, "learning_rate": 0.00018540182352439288, "loss": 1.9039, "step": 1459 }, { "epoch": 1.74, "learning_rate": 0.00018538217330427582, "loss": 1.9777, "step": 1460 }, { "epoch": 1.74, "learning_rate": 0.00018536251091052323, "loss": 1.5702, "step": 1461 }, { "epoch": 1.74, "learning_rate": 0.00018534283634593862, "loss": 1.851, "step": 1462 }, { "epoch": 1.74, "learning_rate": 0.00018532314961332717, "loss": 1.5337, "step": 1463 }, { "epoch": 1.74, "eval_loss": 2.068387508392334, "eval_runtime": 283.4638, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.727, "step": 1463 }, { "epoch": 1.74, "learning_rate": 0.00018530345071549574, "loss": 1.7553, "step": 1464 }, { "epoch": 1.74, "learning_rate": 0.00018528373965525296, "loss": 1.4175, "step": 1465 }, { "epoch": 1.74, "learning_rate": 0.00018526401643540922, "loss": 1.7216, "step": 1466 }, { "epoch": 1.75, "learning_rate": 0.00018524428105877664, "loss": 1.6415, "step": 1467 }, { "epoch": 1.75, "learning_rate": 0.00018522453352816896, "loss": 1.7284, "step": 1468 }, { "epoch": 1.75, "learning_rate": 0.00018520477384640187, "loss": 1.8314, "step": 1469 }, { "epoch": 1.75, "learning_rate": 0.00018518500201629258, "loss": 1.8341, "step": 1470 }, { "epoch": 1.75, "learning_rate": 0.00018516521804066015, "loss": 1.4129, "step": 1471 }, { "epoch": 1.75, "learning_rate": 0.00018514542192232537, "loss": 1.4671, "step": 1472 }, { "epoch": 1.75, "learning_rate": 0.00018512561366411067, "loss": 1.6665, "step": 1473 }, { "epoch": 1.75, "learning_rate": 0.00018510579326884034, "loss": 1.5722, "step": 1474 }, { "epoch": 1.76, "learning_rate": 0.0001850859607393403, "loss": 1.9348, "step": 1475 }, { "epoch": 1.76, "learning_rate": 0.0001850661160784383, "loss": 1.5404, "step": 1476 }, { "epoch": 1.76, "learning_rate": 0.00018504625928896363, "loss": 1.4769, "step": 1477 }, { "epoch": 1.76, "learning_rate": 0.00018502639037374757, "loss": 1.4149, "step": 1478 }, { "epoch": 1.76, "learning_rate": 0.0001850065093356229, "loss": 1.958, "step": 1479 }, { "epoch": 1.76, "learning_rate": 0.00018498661617742426, "loss": 1.8319, "step": 1480 }, { "epoch": 1.76, "learning_rate": 0.00018496671090198797, "loss": 1.5948, "step": 1481 }, { "epoch": 1.76, "learning_rate": 0.0001849467935121521, "loss": 1.8469, "step": 1482 }, { "epoch": 1.76, "learning_rate": 0.00018492686401075644, "loss": 1.6798, "step": 1483 }, { "epoch": 1.77, "learning_rate": 0.0001849069224006425, "loss": 1.8197, "step": 1484 }, { "epoch": 1.77, "learning_rate": 0.0001848869686846535, "loss": 1.6613, "step": 1485 }, { "epoch": 1.77, "learning_rate": 0.0001848670028656344, "loss": 1.7322, "step": 1486 }, { "epoch": 1.77, "learning_rate": 0.00018484702494643188, "loss": 2.0493, "step": 1487 }, { "epoch": 1.77, "learning_rate": 0.00018482703492989444, "loss": 1.7182, "step": 1488 }, { "epoch": 1.77, "learning_rate": 0.00018480703281887215, "loss": 1.689, "step": 1489 }, { "epoch": 1.77, "learning_rate": 0.00018478701861621686, "loss": 1.9477, "step": 1490 }, { "epoch": 1.77, "learning_rate": 0.0001847669923247822, "loss": 1.8171, "step": 1491 }, { "epoch": 1.78, "learning_rate": 0.00018474695394742345, "loss": 1.7337, "step": 1492 }, { "epoch": 1.78, "learning_rate": 0.0001847269034869977, "loss": 1.6983, "step": 1493 }, { "epoch": 1.78, "learning_rate": 0.0001847068409463636, "loss": 1.6445, "step": 1494 }, { "epoch": 1.78, "learning_rate": 0.0001846867663283818, "loss": 1.9965, "step": 1495 }, { "epoch": 1.78, "learning_rate": 0.0001846666796359143, "loss": 1.6775, "step": 1496 }, { "epoch": 1.78, "learning_rate": 0.0001846465808718252, "loss": 1.8117, "step": 1497 }, { "epoch": 1.78, "learning_rate": 0.00018462647003898006, "loss": 1.8803, "step": 1498 }, { "epoch": 1.78, "learning_rate": 0.00018460634714024624, "loss": 1.3045, "step": 1499 }, { "epoch": 1.79, "learning_rate": 0.00018458621217849286, "loss": 1.7768, "step": 1500 }, { "epoch": 1.79, "learning_rate": 0.00018456606515659073, "loss": 2.0641, "step": 1501 }, { "epoch": 1.79, "learning_rate": 0.0001845459060774123, "loss": 1.3804, "step": 1502 }, { "epoch": 1.79, "learning_rate": 0.00018452573494383192, "loss": 1.6271, "step": 1503 }, { "epoch": 1.79, "learning_rate": 0.00018450555175872547, "loss": 1.8525, "step": 1504 }, { "epoch": 1.79, "learning_rate": 0.00018448535652497073, "loss": 1.5303, "step": 1505 }, { "epoch": 1.79, "learning_rate": 0.000184465149245447, "loss": 2.0368, "step": 1506 }, { "epoch": 1.79, "learning_rate": 0.00018444492992303544, "loss": 1.9951, "step": 1507 }, { "epoch": 1.79, "learning_rate": 0.0001844246985606189, "loss": 1.8715, "step": 1508 }, { "epoch": 1.8, "learning_rate": 0.00018440445516108186, "loss": 1.7373, "step": 1509 }, { "epoch": 1.8, "learning_rate": 0.00018438419972731067, "loss": 1.7667, "step": 1510 }, { "epoch": 1.8, "learning_rate": 0.00018436393226219327, "loss": 1.5134, "step": 1511 }, { "epoch": 1.8, "learning_rate": 0.00018434365276861938, "loss": 1.3891, "step": 1512 }, { "epoch": 1.8, "learning_rate": 0.0001843233612494804, "loss": 1.7066, "step": 1513 }, { "epoch": 1.8, "learning_rate": 0.00018430305770766948, "loss": 1.6366, "step": 1514 }, { "epoch": 1.8, "learning_rate": 0.0001842827421460814, "loss": 1.7838, "step": 1515 }, { "epoch": 1.8, "learning_rate": 0.0001842624145676128, "loss": 1.7884, "step": 1516 }, { "epoch": 1.81, "learning_rate": 0.0001842420749751619, "loss": 1.8428, "step": 1517 }, { "epoch": 1.81, "learning_rate": 0.00018422172337162867, "loss": 1.4987, "step": 1518 }, { "epoch": 1.81, "learning_rate": 0.00018420135975991483, "loss": 1.7576, "step": 1519 }, { "epoch": 1.81, "learning_rate": 0.0001841809841429238, "loss": 1.8522, "step": 1520 }, { "epoch": 1.81, "learning_rate": 0.00018416059652356066, "loss": 1.9308, "step": 1521 }, { "epoch": 1.81, "learning_rate": 0.00018414019690473227, "loss": 1.4658, "step": 1522 }, { "epoch": 1.81, "learning_rate": 0.00018411978528934717, "loss": 1.7072, "step": 1523 }, { "epoch": 1.81, "learning_rate": 0.0001840993616803156, "loss": 1.736, "step": 1524 }, { "epoch": 1.82, "learning_rate": 0.0001840789260805495, "loss": 1.7712, "step": 1525 }, { "epoch": 1.82, "learning_rate": 0.0001840584784929626, "loss": 1.2231, "step": 1526 }, { "epoch": 1.82, "learning_rate": 0.00018403801892047023, "loss": 1.8421, "step": 1527 }, { "epoch": 1.82, "learning_rate": 0.00018401754736598947, "loss": 1.2689, "step": 1528 }, { "epoch": 1.82, "learning_rate": 0.00018399706383243918, "loss": 1.8062, "step": 1529 }, { "epoch": 1.82, "learning_rate": 0.0001839765683227398, "loss": 1.6846, "step": 1530 }, { "epoch": 1.82, "learning_rate": 0.0001839560608398136, "loss": 1.8201, "step": 1531 }, { "epoch": 1.82, "learning_rate": 0.00018393554138658441, "loss": 1.6958, "step": 1532 }, { "epoch": 1.82, "learning_rate": 0.00018391500996597796, "loss": 1.8487, "step": 1533 }, { "epoch": 1.83, "learning_rate": 0.0001838944665809215, "loss": 1.9788, "step": 1534 }, { "epoch": 1.83, "learning_rate": 0.00018387391123434412, "loss": 1.6002, "step": 1535 }, { "epoch": 1.83, "learning_rate": 0.00018385334392917658, "loss": 1.3859, "step": 1536 }, { "epoch": 1.83, "learning_rate": 0.00018383276466835127, "loss": 2.0743, "step": 1537 }, { "epoch": 1.83, "learning_rate": 0.00018381217345480235, "loss": 1.8357, "step": 1538 }, { "epoch": 1.83, "learning_rate": 0.00018379157029146573, "loss": 1.7002, "step": 1539 }, { "epoch": 1.83, "learning_rate": 0.00018377095518127897, "loss": 1.3058, "step": 1540 }, { "epoch": 1.83, "learning_rate": 0.00018375032812718124, "loss": 1.8745, "step": 1541 }, { "epoch": 1.84, "learning_rate": 0.00018372968913211364, "loss": 1.7847, "step": 1542 }, { "epoch": 1.84, "learning_rate": 0.00018370903819901874, "loss": 1.8156, "step": 1543 }, { "epoch": 1.84, "learning_rate": 0.00018368837533084095, "loss": 2.0152, "step": 1544 }, { "epoch": 1.84, "learning_rate": 0.00018366770053052634, "loss": 1.5656, "step": 1545 }, { "epoch": 1.84, "learning_rate": 0.00018364701380102266, "loss": 1.5753, "step": 1546 }, { "epoch": 1.84, "learning_rate": 0.00018362631514527947, "loss": 1.3938, "step": 1547 }, { "epoch": 1.84, "learning_rate": 0.00018360560456624788, "loss": 1.9599, "step": 1548 }, { "epoch": 1.84, "learning_rate": 0.00018358488206688075, "loss": 1.8641, "step": 1549 }, { "epoch": 1.85, "learning_rate": 0.00018356414765013267, "loss": 1.8428, "step": 1550 }, { "epoch": 1.85, "learning_rate": 0.00018354340131895998, "loss": 1.6016, "step": 1551 }, { "epoch": 1.85, "learning_rate": 0.00018352264307632056, "loss": 1.5768, "step": 1552 }, { "epoch": 1.85, "learning_rate": 0.00018350187292517415, "loss": 1.5369, "step": 1553 }, { "epoch": 1.85, "learning_rate": 0.0001834810908684821, "loss": 1.9717, "step": 1554 }, { "epoch": 1.85, "learning_rate": 0.00018346029690920746, "loss": 1.943, "step": 1555 }, { "epoch": 1.85, "learning_rate": 0.00018343949105031505, "loss": 1.8166, "step": 1556 }, { "epoch": 1.85, "learning_rate": 0.00018341867329477125, "loss": 1.7149, "step": 1557 }, { "epoch": 1.85, "learning_rate": 0.00018339784364554426, "loss": 1.4657, "step": 1558 }, { "epoch": 1.86, "learning_rate": 0.00018337700210560397, "loss": 1.8693, "step": 1559 }, { "epoch": 1.86, "learning_rate": 0.00018335614867792183, "loss": 1.7656, "step": 1560 }, { "epoch": 1.86, "learning_rate": 0.0001833352833654712, "loss": 1.5123, "step": 1561 }, { "epoch": 1.86, "learning_rate": 0.00018331440617122696, "loss": 1.7884, "step": 1562 }, { "epoch": 1.86, "learning_rate": 0.0001832935170981657, "loss": 1.7309, "step": 1563 }, { "epoch": 1.86, "learning_rate": 0.00018327261614926583, "loss": 1.9628, "step": 1564 }, { "epoch": 1.86, "learning_rate": 0.00018325170332750732, "loss": 1.6409, "step": 1565 }, { "epoch": 1.86, "learning_rate": 0.0001832307786358719, "loss": 1.6093, "step": 1566 }, { "epoch": 1.87, "learning_rate": 0.00018320984207734298, "loss": 1.6111, "step": 1567 }, { "epoch": 1.87, "learning_rate": 0.00018318889365490565, "loss": 2.0085, "step": 1568 }, { "epoch": 1.87, "learning_rate": 0.00018316793337154664, "loss": 2.079, "step": 1569 }, { "epoch": 1.87, "learning_rate": 0.00018314696123025454, "loss": 1.5466, "step": 1570 }, { "epoch": 1.87, "learning_rate": 0.00018312597723401942, "loss": 2.0825, "step": 1571 }, { "epoch": 1.87, "learning_rate": 0.0001831049813858332, "loss": 1.9748, "step": 1572 }, { "epoch": 1.87, "learning_rate": 0.00018308397368868945, "loss": 1.6529, "step": 1573 }, { "epoch": 1.87, "learning_rate": 0.00018306295414558335, "loss": 1.7119, "step": 1574 }, { "epoch": 1.88, "learning_rate": 0.00018304192275951184, "loss": 1.8812, "step": 1575 }, { "epoch": 1.88, "learning_rate": 0.00018302087953347352, "loss": 1.8676, "step": 1576 }, { "epoch": 1.88, "learning_rate": 0.00018299982447046877, "loss": 1.879, "step": 1577 }, { "epoch": 1.88, "learning_rate": 0.00018297875757349952, "loss": 1.6282, "step": 1578 }, { "epoch": 1.88, "learning_rate": 0.00018295767884556947, "loss": 1.735, "step": 1579 }, { "epoch": 1.88, "learning_rate": 0.00018293658828968397, "loss": 1.5796, "step": 1580 }, { "epoch": 1.88, "learning_rate": 0.00018291548590885007, "loss": 1.8258, "step": 1581 }, { "epoch": 1.88, "learning_rate": 0.00018289437170607658, "loss": 1.7531, "step": 1582 }, { "epoch": 1.88, "learning_rate": 0.00018287324568437381, "loss": 1.6265, "step": 1583 }, { "epoch": 1.89, "learning_rate": 0.00018285210784675394, "loss": 1.7997, "step": 1584 }, { "epoch": 1.89, "learning_rate": 0.00018283095819623078, "loss": 1.955, "step": 1585 }, { "epoch": 1.89, "learning_rate": 0.00018280979673581977, "loss": 1.6542, "step": 1586 }, { "epoch": 1.89, "learning_rate": 0.00018278862346853808, "loss": 1.7634, "step": 1587 }, { "epoch": 1.89, "learning_rate": 0.00018276743839740458, "loss": 2.0077, "step": 1588 }, { "epoch": 1.89, "learning_rate": 0.00018274624152543977, "loss": 2.0254, "step": 1589 }, { "epoch": 1.89, "learning_rate": 0.00018272503285566587, "loss": 1.4464, "step": 1590 }, { "epoch": 1.89, "learning_rate": 0.00018270381239110677, "loss": 1.8643, "step": 1591 }, { "epoch": 1.9, "learning_rate": 0.00018268258013478804, "loss": 1.3278, "step": 1592 }, { "epoch": 1.9, "learning_rate": 0.00018266133608973696, "loss": 1.744, "step": 1593 }, { "epoch": 1.9, "learning_rate": 0.00018264008025898248, "loss": 1.5079, "step": 1594 }, { "epoch": 1.9, "learning_rate": 0.00018261881264555516, "loss": 1.9655, "step": 1595 }, { "epoch": 1.9, "learning_rate": 0.0001825975332524873, "loss": 2.0557, "step": 1596 }, { "epoch": 1.9, "learning_rate": 0.000182576242082813, "loss": 1.7174, "step": 1597 }, { "epoch": 1.9, "learning_rate": 0.00018255493913956774, "loss": 1.449, "step": 1598 }, { "epoch": 1.9, "learning_rate": 0.00018253362442578896, "loss": 1.9058, "step": 1599 }, { "epoch": 1.91, "learning_rate": 0.00018251229794451567, "loss": 1.3482, "step": 1600 }, { "epoch": 1.91, "learning_rate": 0.00018249095969878853, "loss": 1.7906, "step": 1601 }, { "epoch": 1.91, "learning_rate": 0.00018246960969164994, "loss": 1.6177, "step": 1602 }, { "epoch": 1.91, "learning_rate": 0.00018244824792614393, "loss": 1.5786, "step": 1603 }, { "epoch": 1.91, "learning_rate": 0.00018242687440531618, "loss": 1.6451, "step": 1604 }, { "epoch": 1.91, "learning_rate": 0.00018240548913221416, "loss": 1.3695, "step": 1605 }, { "epoch": 1.91, "learning_rate": 0.0001823840921098869, "loss": 1.6648, "step": 1606 }, { "epoch": 1.91, "learning_rate": 0.00018236268334138515, "loss": 2.1548, "step": 1607 }, { "epoch": 1.91, "learning_rate": 0.00018234126282976133, "loss": 1.6153, "step": 1608 }, { "epoch": 1.92, "learning_rate": 0.0001823198305780696, "loss": 1.741, "step": 1609 }, { "epoch": 1.92, "learning_rate": 0.00018229838658936564, "loss": 1.7827, "step": 1610 }, { "epoch": 1.92, "learning_rate": 0.00018227693086670697, "loss": 1.7343, "step": 1611 }, { "epoch": 1.92, "learning_rate": 0.00018225546341315261, "loss": 1.8149, "step": 1612 }, { "epoch": 1.92, "learning_rate": 0.0001822339842317635, "loss": 1.5497, "step": 1613 }, { "epoch": 1.92, "learning_rate": 0.00018221249332560198, "loss": 1.7659, "step": 1614 }, { "epoch": 1.92, "learning_rate": 0.0001821909906977322, "loss": 1.8992, "step": 1615 }, { "epoch": 1.92, "learning_rate": 0.00018216947635122, "loss": 1.8682, "step": 1616 }, { "epoch": 1.93, "learning_rate": 0.00018214795028913288, "loss": 1.9774, "step": 1617 }, { "epoch": 1.93, "learning_rate": 0.0001821264125145399, "loss": 1.9441, "step": 1618 }, { "epoch": 1.93, "learning_rate": 0.00018210486303051195, "loss": 2.0314, "step": 1619 }, { "epoch": 1.93, "learning_rate": 0.0001820833018401215, "loss": 1.8234, "step": 1620 }, { "epoch": 1.93, "learning_rate": 0.00018206172894644272, "loss": 1.9478, "step": 1621 }, { "epoch": 1.93, "learning_rate": 0.00018204014435255135, "loss": 1.7894, "step": 1622 }, { "epoch": 1.93, "learning_rate": 0.000182018548061525, "loss": 1.5469, "step": 1623 }, { "epoch": 1.93, "learning_rate": 0.00018199694007644277, "loss": 1.9419, "step": 1624 }, { "epoch": 1.94, "learning_rate": 0.00018197532040038547, "loss": 1.6686, "step": 1625 }, { "epoch": 1.94, "learning_rate": 0.00018195368903643563, "loss": 2.2525, "step": 1626 }, { "epoch": 1.94, "learning_rate": 0.00018193204598767744, "loss": 1.8076, "step": 1627 }, { "epoch": 1.94, "learning_rate": 0.00018191039125719662, "loss": 1.976, "step": 1628 }, { "epoch": 1.94, "learning_rate": 0.00018188872484808076, "loss": 1.6896, "step": 1629 }, { "epoch": 1.94, "learning_rate": 0.00018186704676341898, "loss": 1.6784, "step": 1630 }, { "epoch": 1.94, "learning_rate": 0.00018184535700630213, "loss": 1.9634, "step": 1631 }, { "epoch": 1.94, "learning_rate": 0.00018182365557982264, "loss": 1.7406, "step": 1632 }, { "epoch": 1.94, "learning_rate": 0.00018180194248707473, "loss": 1.7492, "step": 1633 }, { "epoch": 1.95, "learning_rate": 0.00018178021773115414, "loss": 1.7731, "step": 1634 }, { "epoch": 1.95, "learning_rate": 0.00018175848131515837, "loss": 1.6232, "step": 1635 }, { "epoch": 1.95, "learning_rate": 0.0001817367332421866, "loss": 1.7488, "step": 1636 }, { "epoch": 1.95, "learning_rate": 0.0001817149735153396, "loss": 1.3398, "step": 1637 }, { "epoch": 1.95, "learning_rate": 0.00018169320213771983, "loss": 1.4521, "step": 1638 }, { "epoch": 1.95, "learning_rate": 0.00018167141911243145, "loss": 1.6311, "step": 1639 }, { "epoch": 1.95, "learning_rate": 0.00018164962444258014, "loss": 1.8911, "step": 1640 }, { "epoch": 1.95, "learning_rate": 0.00018162781813127346, "loss": 1.9879, "step": 1641 }, { "epoch": 1.96, "learning_rate": 0.0001816060001816205, "loss": 1.5637, "step": 1642 }, { "epoch": 1.96, "learning_rate": 0.00018158417059673196, "loss": 1.7461, "step": 1643 }, { "epoch": 1.96, "learning_rate": 0.0001815623293797203, "loss": 1.6671, "step": 1644 }, { "epoch": 1.96, "learning_rate": 0.0001815404765336996, "loss": 1.2124, "step": 1645 }, { "epoch": 1.96, "learning_rate": 0.0001815186120617856, "loss": 1.6402, "step": 1646 }, { "epoch": 1.96, "learning_rate": 0.0001814967359670957, "loss": 1.8837, "step": 1647 }, { "epoch": 1.96, "learning_rate": 0.00018147484825274893, "loss": 1.8027, "step": 1648 }, { "epoch": 1.96, "learning_rate": 0.00018145294892186605, "loss": 1.7684, "step": 1649 }, { "epoch": 1.97, "learning_rate": 0.0001814310379775694, "loss": 1.8274, "step": 1650 }, { "epoch": 1.97, "learning_rate": 0.000181409115422983, "loss": 1.8292, "step": 1651 }, { "epoch": 1.97, "learning_rate": 0.00018138718126123248, "loss": 1.3492, "step": 1652 }, { "epoch": 1.97, "learning_rate": 0.00018136523549544523, "loss": 1.509, "step": 1653 }, { "epoch": 1.97, "learning_rate": 0.00018134327812875024, "loss": 1.7415, "step": 1654 }, { "epoch": 1.97, "learning_rate": 0.00018132130916427816, "loss": 1.5223, "step": 1655 }, { "epoch": 1.97, "learning_rate": 0.00018129932860516126, "loss": 1.9294, "step": 1656 }, { "epoch": 1.97, "learning_rate": 0.00018127733645453348, "loss": 2.0716, "step": 1657 }, { "epoch": 1.97, "learning_rate": 0.00018125533271553043, "loss": 1.57, "step": 1658 }, { "epoch": 1.98, "learning_rate": 0.00018123331739128938, "loss": 2.2132, "step": 1659 }, { "epoch": 1.98, "learning_rate": 0.00018121129048494922, "loss": 1.9006, "step": 1660 }, { "epoch": 1.98, "learning_rate": 0.00018118925199965048, "loss": 1.9319, "step": 1661 }, { "epoch": 1.98, "learning_rate": 0.00018116720193853543, "loss": 1.8103, "step": 1662 }, { "epoch": 1.98, "learning_rate": 0.00018114514030474787, "loss": 1.7028, "step": 1663 }, { "epoch": 1.98, "learning_rate": 0.00018112306710143334, "loss": 1.802, "step": 1664 }, { "epoch": 1.98, "learning_rate": 0.000181100982331739, "loss": 1.6835, "step": 1665 }, { "epoch": 1.98, "learning_rate": 0.0001810788859988136, "loss": 1.7223, "step": 1666 }, { "epoch": 1.99, "learning_rate": 0.0001810567781058077, "loss": 1.5829, "step": 1667 }, { "epoch": 1.99, "learning_rate": 0.00018103465865587333, "loss": 1.9863, "step": 1668 }, { "epoch": 1.99, "learning_rate": 0.0001810125276521642, "loss": 1.6398, "step": 1669 }, { "epoch": 1.99, "learning_rate": 0.00018099038509783582, "loss": 1.9261, "step": 1670 }, { "epoch": 1.99, "learning_rate": 0.00018096823099604517, "loss": 1.8882, "step": 1671 }, { "epoch": 1.99, "learning_rate": 0.00018094606534995093, "loss": 1.6716, "step": 1672 }, { "epoch": 1.99, "eval_loss": 2.075261354446411, "eval_runtime": 283.9438, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 1672 }, { "epoch": 1.99, "learning_rate": 0.00018092388816271345, "loss": 1.6688, "step": 1673 }, { "epoch": 1.99, "learning_rate": 0.00018090169943749476, "loss": 1.9127, "step": 1674 }, { "epoch": 2.0, "learning_rate": 0.0001808794991774584, "loss": 1.7214, "step": 1675 }, { "epoch": 2.0, "learning_rate": 0.00018085728738576973, "loss": 1.785, "step": 1676 }, { "epoch": 2.0, "learning_rate": 0.00018083506406559561, "loss": 1.5287, "step": 1677 }, { "epoch": 2.0, "learning_rate": 0.00018081282922010464, "loss": 1.9012, "step": 1678 }, { "epoch": 2.0, "learning_rate": 0.00018079058285246698, "loss": 1.3094, "step": 1679 }, { "epoch": 2.0, "learning_rate": 0.0001807683249658545, "loss": 1.818, "step": 1680 }, { "epoch": 2.0, "learning_rate": 0.0001807460555634407, "loss": 1.9389, "step": 1681 }, { "epoch": 2.0, "learning_rate": 0.0001807237746484007, "loss": 1.4334, "step": 1682 }, { "epoch": 2.0, "learning_rate": 0.00018070148222391126, "loss": 1.5422, "step": 1683 }, { "epoch": 2.01, "learning_rate": 0.0001806791782931508, "loss": 1.7899, "step": 1684 }, { "epoch": 2.01, "learning_rate": 0.0001806568628592994, "loss": 1.6106, "step": 1685 }, { "epoch": 2.01, "learning_rate": 0.00018063453592553872, "loss": 1.9807, "step": 1686 }, { "epoch": 2.01, "learning_rate": 0.0001806121974950521, "loss": 1.1762, "step": 1687 }, { "epoch": 2.01, "learning_rate": 0.00018058984757102456, "loss": 1.8338, "step": 1688 }, { "epoch": 2.01, "learning_rate": 0.0001805674861566426, "loss": 1.5556, "step": 1689 }, { "epoch": 2.0, "learning_rate": 0.0001805451132550946, "loss": 0.87, "step": 1690 }, { "epoch": 2.0, "learning_rate": 0.00018052272886957038, "loss": 1.0386, "step": 1691 }, { "epoch": 2.0, "learning_rate": 0.0001805003330032615, "loss": 0.8153, "step": 1692 }, { "epoch": 2.0, "learning_rate": 0.00018047792565936102, "loss": 1.1745, "step": 1693 }, { "epoch": 2.01, "learning_rate": 0.00018045550684106388, "loss": 1.1584, "step": 1694 }, { "epoch": 2.01, "learning_rate": 0.00018043307655156644, "loss": 1.0742, "step": 1695 }, { "epoch": 2.01, "learning_rate": 0.00018041063479406675, "loss": 1.0537, "step": 1696 }, { "epoch": 2.01, "learning_rate": 0.0001803881815717646, "loss": 1.0239, "step": 1697 }, { "epoch": 2.01, "learning_rate": 0.0001803657168878612, "loss": 0.9182, "step": 1698 }, { "epoch": 2.01, "learning_rate": 0.00018034324074555965, "loss": 1.1856, "step": 1699 }, { "epoch": 2.01, "learning_rate": 0.00018032075314806448, "loss": 1.3285, "step": 1700 }, { "epoch": 2.01, "learning_rate": 0.00018029825409858198, "loss": 1.2912, "step": 1701 }, { "epoch": 2.02, "learning_rate": 0.00018027574360032, "loss": 1.3666, "step": 1702 }, { "epoch": 2.02, "learning_rate": 0.00018025322165648807, "loss": 0.9621, "step": 1703 }, { "epoch": 2.02, "learning_rate": 0.00018023068827029723, "loss": 0.8484, "step": 1704 }, { "epoch": 2.02, "learning_rate": 0.00018020814344496037, "loss": 1.2236, "step": 1705 }, { "epoch": 2.02, "learning_rate": 0.00018018558718369186, "loss": 0.8155, "step": 1706 }, { "epoch": 2.02, "learning_rate": 0.0001801630194897077, "loss": 1.2047, "step": 1707 }, { "epoch": 2.02, "learning_rate": 0.00018014044036622555, "loss": 1.0269, "step": 1708 }, { "epoch": 2.02, "learning_rate": 0.00018011784981646474, "loss": 1.0536, "step": 1709 }, { "epoch": 2.03, "learning_rate": 0.00018009524784364615, "loss": 1.0516, "step": 1710 }, { "epoch": 2.03, "learning_rate": 0.00018007263445099235, "loss": 0.9087, "step": 1711 }, { "epoch": 2.03, "learning_rate": 0.0001800500096417275, "loss": 1.3057, "step": 1712 }, { "epoch": 2.03, "learning_rate": 0.00018002737341907743, "loss": 0.8791, "step": 1713 }, { "epoch": 2.03, "learning_rate": 0.00018000472578626956, "loss": 1.1667, "step": 1714 }, { "epoch": 2.03, "learning_rate": 0.00017998206674653294, "loss": 1.1026, "step": 1715 }, { "epoch": 2.03, "learning_rate": 0.00017995939630309826, "loss": 1.3228, "step": 1716 }, { "epoch": 2.03, "learning_rate": 0.0001799367144591978, "loss": 0.9173, "step": 1717 }, { "epoch": 2.03, "learning_rate": 0.00017991402121806557, "loss": 1.0067, "step": 1718 }, { "epoch": 2.04, "learning_rate": 0.0001798913165829371, "loss": 1.0256, "step": 1719 }, { "epoch": 2.04, "learning_rate": 0.00017986860055704953, "loss": 0.7645, "step": 1720 }, { "epoch": 2.04, "learning_rate": 0.0001798458731436417, "loss": 1.0567, "step": 1721 }, { "epoch": 2.04, "learning_rate": 0.00017982313434595406, "loss": 0.7465, "step": 1722 }, { "epoch": 2.04, "learning_rate": 0.00017980038416722863, "loss": 1.3268, "step": 1723 }, { "epoch": 2.04, "learning_rate": 0.00017977762261070916, "loss": 0.9917, "step": 1724 }, { "epoch": 2.04, "learning_rate": 0.00017975484967964087, "loss": 0.8592, "step": 1725 }, { "epoch": 2.04, "learning_rate": 0.00017973206537727073, "loss": 1.43, "step": 1726 }, { "epoch": 2.05, "learning_rate": 0.00017970926970684725, "loss": 1.3679, "step": 1727 }, { "epoch": 2.05, "learning_rate": 0.00017968646267162063, "loss": 1.2959, "step": 1728 }, { "epoch": 2.05, "learning_rate": 0.00017966364427484267, "loss": 1.0674, "step": 1729 }, { "epoch": 2.05, "learning_rate": 0.00017964081451976672, "loss": 1.1153, "step": 1730 }, { "epoch": 2.05, "learning_rate": 0.00017961797340964783, "loss": 1.0586, "step": 1731 }, { "epoch": 2.05, "learning_rate": 0.00017959512094774266, "loss": 1.2388, "step": 1732 }, { "epoch": 2.05, "learning_rate": 0.00017957225713730949, "loss": 1.257, "step": 1733 }, { "epoch": 2.05, "learning_rate": 0.0001795493819816081, "loss": 1.099, "step": 1734 }, { "epoch": 2.06, "learning_rate": 0.0001795264954839001, "loss": 0.9532, "step": 1735 }, { "epoch": 2.06, "learning_rate": 0.00017950359764744859, "loss": 1.2553, "step": 1736 }, { "epoch": 2.06, "learning_rate": 0.00017948068847551825, "loss": 0.9973, "step": 1737 }, { "epoch": 2.06, "learning_rate": 0.00017945776797137543, "loss": 1.0637, "step": 1738 }, { "epoch": 2.06, "learning_rate": 0.00017943483613828815, "loss": 1.1815, "step": 1739 }, { "epoch": 2.06, "learning_rate": 0.00017941189297952597, "loss": 0.8378, "step": 1740 }, { "epoch": 2.06, "learning_rate": 0.00017938893849836002, "loss": 0.9375, "step": 1741 }, { "epoch": 2.06, "learning_rate": 0.00017936597269806322, "loss": 0.9653, "step": 1742 }, { "epoch": 2.06, "learning_rate": 0.0001793429955819099, "loss": 1.221, "step": 1743 }, { "epoch": 2.07, "learning_rate": 0.00017932000715317612, "loss": 1.041, "step": 1744 }, { "epoch": 2.07, "learning_rate": 0.00017929700741513955, "loss": 1.0724, "step": 1745 }, { "epoch": 2.07, "learning_rate": 0.00017927399637107945, "loss": 1.1102, "step": 1746 }, { "epoch": 2.07, "learning_rate": 0.00017925097402427667, "loss": 0.8542, "step": 1747 }, { "epoch": 2.07, "learning_rate": 0.0001792279403780137, "loss": 1.2339, "step": 1748 }, { "epoch": 2.07, "learning_rate": 0.00017920489543557465, "loss": 0.8671, "step": 1749 }, { "epoch": 2.07, "learning_rate": 0.0001791818392002452, "loss": 0.9779, "step": 1750 }, { "epoch": 2.07, "learning_rate": 0.0001791587716753127, "loss": 1.1242, "step": 1751 }, { "epoch": 2.08, "learning_rate": 0.00017913569286406603, "loss": 0.9043, "step": 1752 }, { "epoch": 2.08, "learning_rate": 0.0001791126027697958, "loss": 0.7996, "step": 1753 }, { "epoch": 2.08, "learning_rate": 0.00017908950139579406, "loss": 0.8602, "step": 1754 }, { "epoch": 2.08, "learning_rate": 0.00017906638874535462, "loss": 1.0161, "step": 1755 }, { "epoch": 2.08, "learning_rate": 0.00017904326482177284, "loss": 0.8226, "step": 1756 }, { "epoch": 2.08, "learning_rate": 0.00017902012962834566, "loss": 1.3885, "step": 1757 }, { "epoch": 2.08, "learning_rate": 0.0001789969831683717, "loss": 1.2158, "step": 1758 }, { "epoch": 2.08, "learning_rate": 0.00017897382544515108, "loss": 1.3261, "step": 1759 }, { "epoch": 2.09, "learning_rate": 0.00017895065646198567, "loss": 1.2144, "step": 1760 }, { "epoch": 2.09, "learning_rate": 0.00017892747622217875, "loss": 0.9881, "step": 1761 }, { "epoch": 2.09, "learning_rate": 0.0001789042847290354, "loss": 1.0342, "step": 1762 }, { "epoch": 2.09, "learning_rate": 0.00017888108198586217, "loss": 0.7883, "step": 1763 }, { "epoch": 2.09, "learning_rate": 0.00017885786799596732, "loss": 0.9006, "step": 1764 }, { "epoch": 2.09, "learning_rate": 0.00017883464276266064, "loss": 1.3695, "step": 1765 }, { "epoch": 2.09, "learning_rate": 0.0001788114062892535, "loss": 1.0303, "step": 1766 }, { "epoch": 2.09, "learning_rate": 0.00017878815857905897, "loss": 1.3816, "step": 1767 }, { "epoch": 2.09, "learning_rate": 0.0001787648996353916, "loss": 0.8684, "step": 1768 }, { "epoch": 2.1, "learning_rate": 0.00017874162946156772, "loss": 1.1157, "step": 1769 }, { "epoch": 2.1, "learning_rate": 0.00017871834806090501, "loss": 1.0087, "step": 1770 }, { "epoch": 2.1, "learning_rate": 0.000178695055436723, "loss": 0.7173, "step": 1771 }, { "epoch": 2.1, "learning_rate": 0.00017867175159234265, "loss": 1.4784, "step": 1772 }, { "epoch": 2.1, "learning_rate": 0.00017864843653108662, "loss": 1.1401, "step": 1773 }, { "epoch": 2.1, "learning_rate": 0.0001786251102562791, "loss": 1.0952, "step": 1774 }, { "epoch": 2.1, "learning_rate": 0.0001786017727712459, "loss": 0.9443, "step": 1775 }, { "epoch": 2.1, "learning_rate": 0.00017857842407931445, "loss": 1.0682, "step": 1776 }, { "epoch": 2.11, "learning_rate": 0.0001785550641838138, "loss": 0.9402, "step": 1777 }, { "epoch": 2.11, "learning_rate": 0.00017853169308807448, "loss": 1.0576, "step": 1778 }, { "epoch": 2.11, "learning_rate": 0.0001785083107954288, "loss": 1.1425, "step": 1779 }, { "epoch": 2.11, "learning_rate": 0.00017848491730921046, "loss": 1.1402, "step": 1780 }, { "epoch": 2.11, "learning_rate": 0.00017846151263275494, "loss": 1.4482, "step": 1781 }, { "epoch": 2.11, "learning_rate": 0.00017843809676939922, "loss": 0.7765, "step": 1782 }, { "epoch": 2.11, "learning_rate": 0.00017841466972248188, "loss": 1.1478, "step": 1783 }, { "epoch": 2.11, "learning_rate": 0.0001783912314953431, "loss": 1.1876, "step": 1784 }, { "epoch": 2.12, "learning_rate": 0.00017836778209132464, "loss": 1.2036, "step": 1785 }, { "epoch": 2.12, "learning_rate": 0.0001783443215137699, "loss": 1.0297, "step": 1786 }, { "epoch": 2.12, "learning_rate": 0.0001783208497660239, "loss": 0.8186, "step": 1787 }, { "epoch": 2.12, "learning_rate": 0.00017829736685143308, "loss": 0.7258, "step": 1788 }, { "epoch": 2.12, "learning_rate": 0.00017827387277334568, "loss": 0.8072, "step": 1789 }, { "epoch": 2.12, "learning_rate": 0.00017825036753511144, "loss": 1.0474, "step": 1790 }, { "epoch": 2.12, "learning_rate": 0.00017822685114008167, "loss": 1.2141, "step": 1791 }, { "epoch": 2.12, "learning_rate": 0.00017820332359160928, "loss": 1.1443, "step": 1792 }, { "epoch": 2.12, "learning_rate": 0.0001781797848930488, "loss": 0.9864, "step": 1793 }, { "epoch": 2.13, "learning_rate": 0.00017815623504775636, "loss": 1.2998, "step": 1794 }, { "epoch": 2.13, "learning_rate": 0.0001781326740590896, "loss": 1.0672, "step": 1795 }, { "epoch": 2.13, "learning_rate": 0.00017810910193040785, "loss": 0.9152, "step": 1796 }, { "epoch": 2.13, "learning_rate": 0.000178085518665072, "loss": 1.2555, "step": 1797 }, { "epoch": 2.13, "learning_rate": 0.00017806192426644444, "loss": 1.2085, "step": 1798 }, { "epoch": 2.13, "learning_rate": 0.00017803831873788926, "loss": 1.6205, "step": 1799 }, { "epoch": 2.13, "learning_rate": 0.0001780147020827721, "loss": 1.3382, "step": 1800 }, { "epoch": 2.13, "learning_rate": 0.00017799107430446016, "loss": 1.3309, "step": 1801 }, { "epoch": 2.14, "learning_rate": 0.00017796743540632223, "loss": 1.2556, "step": 1802 }, { "epoch": 2.14, "learning_rate": 0.00017794378539172877, "loss": 0.829, "step": 1803 }, { "epoch": 2.14, "learning_rate": 0.00017792012426405166, "loss": 1.1711, "step": 1804 }, { "epoch": 2.14, "learning_rate": 0.00017789645202666456, "loss": 1.0128, "step": 1805 }, { "epoch": 2.14, "learning_rate": 0.00017787276868294253, "loss": 1.2074, "step": 1806 }, { "epoch": 2.14, "learning_rate": 0.00017784907423626237, "loss": 1.0996, "step": 1807 }, { "epoch": 2.14, "learning_rate": 0.0001778253686900023, "loss": 0.9608, "step": 1808 }, { "epoch": 2.14, "learning_rate": 0.0001778016520475423, "loss": 0.827, "step": 1809 }, { "epoch": 2.15, "learning_rate": 0.00017777792431226383, "loss": 1.2365, "step": 1810 }, { "epoch": 2.15, "learning_rate": 0.00017775418548754993, "loss": 1.0276, "step": 1811 }, { "epoch": 2.15, "learning_rate": 0.0001777304355767852, "loss": 0.8178, "step": 1812 }, { "epoch": 2.15, "learning_rate": 0.0001777066745833559, "loss": 1.1297, "step": 1813 }, { "epoch": 2.15, "learning_rate": 0.00017768290251064987, "loss": 1.1737, "step": 1814 }, { "epoch": 2.15, "learning_rate": 0.00017765911936205644, "loss": 1.1606, "step": 1815 }, { "epoch": 2.15, "learning_rate": 0.00017763532514096658, "loss": 1.2605, "step": 1816 }, { "epoch": 2.15, "learning_rate": 0.0001776115198507728, "loss": 1.2271, "step": 1817 }, { "epoch": 2.15, "learning_rate": 0.00017758770349486923, "loss": 0.9407, "step": 1818 }, { "epoch": 2.16, "learning_rate": 0.0001775638760766516, "loss": 1.0273, "step": 1819 }, { "epoch": 2.16, "learning_rate": 0.00017754003759951715, "loss": 1.0746, "step": 1820 }, { "epoch": 2.16, "learning_rate": 0.00017751618806686472, "loss": 1.0091, "step": 1821 }, { "epoch": 2.16, "learning_rate": 0.00017749232748209473, "loss": 0.997, "step": 1822 }, { "epoch": 2.16, "learning_rate": 0.0001774684558486092, "loss": 1.4814, "step": 1823 }, { "epoch": 2.16, "learning_rate": 0.00017744457316981168, "loss": 1.1407, "step": 1824 }, { "epoch": 2.16, "learning_rate": 0.00017742067944910737, "loss": 0.9824, "step": 1825 }, { "epoch": 2.16, "learning_rate": 0.00017739677468990293, "loss": 1.2603, "step": 1826 }, { "epoch": 2.17, "learning_rate": 0.00017737285889560668, "loss": 1.3721, "step": 1827 }, { "epoch": 2.17, "learning_rate": 0.00017734893206962853, "loss": 1.1186, "step": 1828 }, { "epoch": 2.17, "learning_rate": 0.00017732499421537984, "loss": 0.7693, "step": 1829 }, { "epoch": 2.17, "learning_rate": 0.0001773010453362737, "loss": 1.0449, "step": 1830 }, { "epoch": 2.17, "learning_rate": 0.00017727708543572467, "loss": 0.9331, "step": 1831 }, { "epoch": 2.17, "learning_rate": 0.0001772531145171489, "loss": 0.739, "step": 1832 }, { "epoch": 2.17, "learning_rate": 0.00017722913258396417, "loss": 0.9076, "step": 1833 }, { "epoch": 2.17, "learning_rate": 0.00017720513963958968, "loss": 1.3464, "step": 1834 }, { "epoch": 2.18, "learning_rate": 0.00017718113568744638, "loss": 0.8858, "step": 1835 }, { "epoch": 2.18, "learning_rate": 0.00017715712073095672, "loss": 1.3204, "step": 1836 }, { "epoch": 2.18, "learning_rate": 0.00017713309477354467, "loss": 1.0538, "step": 1837 }, { "epoch": 2.18, "learning_rate": 0.0001771090578186358, "loss": 1.44, "step": 1838 }, { "epoch": 2.18, "learning_rate": 0.0001770850098696573, "loss": 1.0167, "step": 1839 }, { "epoch": 2.18, "learning_rate": 0.00017706095093003785, "loss": 0.9724, "step": 1840 }, { "epoch": 2.18, "learning_rate": 0.00017703688100320774, "loss": 0.8055, "step": 1841 }, { "epoch": 2.18, "learning_rate": 0.0001770128000925988, "loss": 0.7363, "step": 1842 }, { "epoch": 2.18, "learning_rate": 0.00017698870820164446, "loss": 1.1329, "step": 1843 }, { "epoch": 2.19, "learning_rate": 0.00017696460533377968, "loss": 0.9487, "step": 1844 }, { "epoch": 2.19, "learning_rate": 0.00017694049149244104, "loss": 1.2571, "step": 1845 }, { "epoch": 2.19, "learning_rate": 0.0001769163666810666, "loss": 0.9148, "step": 1846 }, { "epoch": 2.19, "learning_rate": 0.00017689223090309607, "loss": 1.4676, "step": 1847 }, { "epoch": 2.19, "learning_rate": 0.00017686808416197072, "loss": 0.9395, "step": 1848 }, { "epoch": 2.19, "learning_rate": 0.00017684392646113325, "loss": 0.9632, "step": 1849 }, { "epoch": 2.19, "learning_rate": 0.00017681975780402807, "loss": 1.0037, "step": 1850 }, { "epoch": 2.19, "learning_rate": 0.0001767955781941011, "loss": 0.9557, "step": 1851 }, { "epoch": 2.2, "learning_rate": 0.00017677138763479985, "loss": 1.2799, "step": 1852 }, { "epoch": 2.2, "learning_rate": 0.00017674718612957336, "loss": 0.8461, "step": 1853 }, { "epoch": 2.2, "learning_rate": 0.0001767229736818722, "loss": 1.2762, "step": 1854 }, { "epoch": 2.2, "learning_rate": 0.00017669875029514856, "loss": 1.4801, "step": 1855 }, { "epoch": 2.2, "learning_rate": 0.00017667451597285617, "loss": 0.9849, "step": 1856 }, { "epoch": 2.2, "learning_rate": 0.0001766502707184503, "loss": 1.0875, "step": 1857 }, { "epoch": 2.2, "learning_rate": 0.00017662601453538783, "loss": 0.8346, "step": 1858 }, { "epoch": 2.2, "learning_rate": 0.0001766017474271271, "loss": 1.1933, "step": 1859 }, { "epoch": 2.21, "learning_rate": 0.00017657746939712815, "loss": 0.8789, "step": 1860 }, { "epoch": 2.21, "learning_rate": 0.00017655318044885245, "loss": 1.0091, "step": 1861 }, { "epoch": 2.21, "learning_rate": 0.0001765288805857631, "loss": 0.7371, "step": 1862 }, { "epoch": 2.21, "learning_rate": 0.00017650456981132466, "loss": 0.8131, "step": 1863 }, { "epoch": 2.21, "learning_rate": 0.00017648024812900342, "loss": 1.0795, "step": 1864 }, { "epoch": 2.21, "learning_rate": 0.000176455915542267, "loss": 0.9882, "step": 1865 }, { "epoch": 2.21, "learning_rate": 0.00017643157205458483, "loss": 1.212, "step": 1866 }, { "epoch": 2.21, "learning_rate": 0.00017640721766942768, "loss": 1.4755, "step": 1867 }, { "epoch": 2.21, "learning_rate": 0.00017638285239026798, "loss": 1.0391, "step": 1868 }, { "epoch": 2.22, "learning_rate": 0.00017635847622057965, "loss": 1.2568, "step": 1869 }, { "epoch": 2.22, "learning_rate": 0.00017633408916383826, "loss": 1.2138, "step": 1870 }, { "epoch": 2.22, "learning_rate": 0.0001763096912235208, "loss": 1.196, "step": 1871 }, { "epoch": 2.22, "learning_rate": 0.00017628528240310596, "loss": 1.1476, "step": 1872 }, { "epoch": 2.22, "learning_rate": 0.00017626086270607384, "loss": 1.1421, "step": 1873 }, { "epoch": 2.22, "learning_rate": 0.00017623643213590619, "loss": 1.0711, "step": 1874 }, { "epoch": 2.22, "learning_rate": 0.0001762119906960863, "loss": 0.8842, "step": 1875 }, { "epoch": 2.22, "learning_rate": 0.00017618753839009893, "loss": 0.798, "step": 1876 }, { "epoch": 2.23, "learning_rate": 0.0001761630752214305, "loss": 0.8591, "step": 1877 }, { "epoch": 2.23, "learning_rate": 0.00017613860119356883, "loss": 0.7646, "step": 1878 }, { "epoch": 2.23, "learning_rate": 0.0001761141163100035, "loss": 1.4113, "step": 1879 }, { "epoch": 2.23, "learning_rate": 0.00017608962057422549, "loss": 0.8605, "step": 1880 }, { "epoch": 2.23, "learning_rate": 0.00017606511398972731, "loss": 0.6179, "step": 1881 }, { "epoch": 2.23, "eval_loss": 2.3971996307373047, "eval_runtime": 283.7444, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 1881 }, { "epoch": 2.23, "learning_rate": 0.0001760405965600031, "loss": 0.8651, "step": 1882 }, { "epoch": 2.23, "learning_rate": 0.0001760160682885485, "loss": 1.3178, "step": 1883 }, { "epoch": 2.23, "learning_rate": 0.00017599152917886071, "loss": 0.9233, "step": 1884 }, { "epoch": 2.24, "learning_rate": 0.00017596697923443847, "loss": 0.9126, "step": 1885 }, { "epoch": 2.24, "learning_rate": 0.0001759424184587821, "loss": 0.9749, "step": 1886 }, { "epoch": 2.24, "learning_rate": 0.00017591784685539334, "loss": 1.1929, "step": 1887 }, { "epoch": 2.24, "learning_rate": 0.00017589326442777565, "loss": 1.2026, "step": 1888 }, { "epoch": 2.24, "learning_rate": 0.00017586867117943392, "loss": 1.1162, "step": 1889 }, { "epoch": 2.24, "learning_rate": 0.00017584406711387463, "loss": 0.9818, "step": 1890 }, { "epoch": 2.24, "learning_rate": 0.0001758194522346057, "loss": 0.9802, "step": 1891 }, { "epoch": 2.24, "learning_rate": 0.0001757948265451368, "loss": 0.8963, "step": 1892 }, { "epoch": 2.24, "learning_rate": 0.00017577019004897897, "loss": 1.0359, "step": 1893 }, { "epoch": 2.25, "learning_rate": 0.00017574554274964478, "loss": 1.0788, "step": 1894 }, { "epoch": 2.25, "learning_rate": 0.00017572088465064848, "loss": 0.9415, "step": 1895 }, { "epoch": 2.25, "learning_rate": 0.0001756962157555057, "loss": 1.0944, "step": 1896 }, { "epoch": 2.25, "learning_rate": 0.00017567153606773373, "loss": 1.357, "step": 1897 }, { "epoch": 2.25, "learning_rate": 0.00017564684559085136, "loss": 1.0108, "step": 1898 }, { "epoch": 2.25, "learning_rate": 0.0001756221443283789, "loss": 0.5337, "step": 1899 }, { "epoch": 2.25, "learning_rate": 0.0001755974322838382, "loss": 1.4234, "step": 1900 }, { "epoch": 2.25, "learning_rate": 0.0001755727094607527, "loss": 0.9083, "step": 1901 }, { "epoch": 2.26, "learning_rate": 0.00017554797586264727, "loss": 0.9199, "step": 1902 }, { "epoch": 2.26, "learning_rate": 0.00017552323149304844, "loss": 1.1885, "step": 1903 }, { "epoch": 2.26, "learning_rate": 0.0001754984763554842, "loss": 1.276, "step": 1904 }, { "epoch": 2.26, "learning_rate": 0.0001754737104534841, "loss": 0.8882, "step": 1905 }, { "epoch": 2.26, "learning_rate": 0.00017544893379057918, "loss": 0.993, "step": 1906 }, { "epoch": 2.26, "learning_rate": 0.0001754241463703021, "loss": 1.261, "step": 1907 }, { "epoch": 2.26, "learning_rate": 0.00017539934819618696, "loss": 0.9877, "step": 1908 }, { "epoch": 2.26, "learning_rate": 0.00017537453927176947, "loss": 0.9991, "step": 1909 }, { "epoch": 2.27, "learning_rate": 0.00017534971960058685, "loss": 1.2012, "step": 1910 }, { "epoch": 2.27, "learning_rate": 0.0001753248891861778, "loss": 0.864, "step": 1911 }, { "epoch": 2.27, "learning_rate": 0.00017530004803208263, "loss": 1.0382, "step": 1912 }, { "epoch": 2.27, "learning_rate": 0.00017527519614184316, "loss": 1.068, "step": 1913 }, { "epoch": 2.27, "learning_rate": 0.00017525033351900268, "loss": 0.8687, "step": 1914 }, { "epoch": 2.27, "learning_rate": 0.0001752254601671061, "loss": 1.1174, "step": 1915 }, { "epoch": 2.27, "learning_rate": 0.0001752005760896998, "loss": 1.269, "step": 1916 }, { "epoch": 2.27, "learning_rate": 0.0001751756812903317, "loss": 0.7387, "step": 1917 }, { "epoch": 2.27, "learning_rate": 0.0001751507757725513, "loss": 0.8484, "step": 1918 }, { "epoch": 2.28, "learning_rate": 0.0001751258595399095, "loss": 1.0092, "step": 1919 }, { "epoch": 2.28, "learning_rate": 0.00017510093259595885, "loss": 1.0145, "step": 1920 }, { "epoch": 2.28, "learning_rate": 0.00017507599494425344, "loss": 1.2969, "step": 1921 }, { "epoch": 2.28, "learning_rate": 0.00017505104658834875, "loss": 0.7925, "step": 1922 }, { "epoch": 2.28, "learning_rate": 0.00017502608753180196, "loss": 0.8974, "step": 1923 }, { "epoch": 2.28, "learning_rate": 0.00017500111777817164, "loss": 0.764, "step": 1924 }, { "epoch": 2.28, "learning_rate": 0.0001749761373310179, "loss": 1.1057, "step": 1925 }, { "epoch": 2.28, "learning_rate": 0.00017495114619390246, "loss": 0.8092, "step": 1926 }, { "epoch": 2.29, "learning_rate": 0.00017492614437038845, "loss": 0.9553, "step": 1927 }, { "epoch": 2.29, "learning_rate": 0.00017490113186404067, "loss": 1.0278, "step": 1928 }, { "epoch": 2.29, "learning_rate": 0.0001748761086784253, "loss": 1.2152, "step": 1929 }, { "epoch": 2.29, "learning_rate": 0.00017485107481711012, "loss": 1.5154, "step": 1930 }, { "epoch": 2.29, "learning_rate": 0.0001748260302836644, "loss": 1.1973, "step": 1931 }, { "epoch": 2.29, "learning_rate": 0.00017480097508165896, "loss": 0.9429, "step": 1932 }, { "epoch": 2.29, "learning_rate": 0.0001747759092146661, "loss": 1.5453, "step": 1933 }, { "epoch": 2.29, "learning_rate": 0.0001747508326862597, "loss": 1.1691, "step": 1934 }, { "epoch": 2.3, "learning_rate": 0.00017472574550001508, "loss": 1.2094, "step": 1935 }, { "epoch": 2.3, "learning_rate": 0.00017470064765950918, "loss": 1.0777, "step": 1936 }, { "epoch": 2.3, "learning_rate": 0.00017467553916832035, "loss": 1.0883, "step": 1937 }, { "epoch": 2.3, "learning_rate": 0.00017465042003002857, "loss": 0.9297, "step": 1938 }, { "epoch": 2.3, "learning_rate": 0.00017462529024821522, "loss": 0.7814, "step": 1939 }, { "epoch": 2.3, "learning_rate": 0.00017460014982646334, "loss": 1.3645, "step": 1940 }, { "epoch": 2.3, "learning_rate": 0.0001745749987683573, "loss": 1.0604, "step": 1941 }, { "epoch": 2.3, "learning_rate": 0.00017454983707748317, "loss": 0.9416, "step": 1942 }, { "epoch": 2.3, "learning_rate": 0.00017452466475742845, "loss": 1.4187, "step": 1943 }, { "epoch": 2.31, "learning_rate": 0.00017449948181178215, "loss": 1.1619, "step": 1944 }, { "epoch": 2.31, "learning_rate": 0.00017447428824413482, "loss": 1.1381, "step": 1945 }, { "epoch": 2.31, "learning_rate": 0.00017444908405807845, "loss": 1.2304, "step": 1946 }, { "epoch": 2.31, "learning_rate": 0.0001744238692572067, "loss": 1.2149, "step": 1947 }, { "epoch": 2.31, "learning_rate": 0.00017439864384511463, "loss": 0.8172, "step": 1948 }, { "epoch": 2.31, "learning_rate": 0.00017437340782539877, "loss": 1.0783, "step": 1949 }, { "epoch": 2.31, "learning_rate": 0.00017434816120165728, "loss": 1.0661, "step": 1950 }, { "epoch": 2.31, "learning_rate": 0.00017432290397748982, "loss": 1.1959, "step": 1951 }, { "epoch": 2.32, "learning_rate": 0.0001742976361564974, "loss": 1.0581, "step": 1952 }, { "epoch": 2.32, "learning_rate": 0.00017427235774228274, "loss": 0.8948, "step": 1953 }, { "epoch": 2.32, "learning_rate": 0.00017424706873845, "loss": 1.2565, "step": 1954 }, { "epoch": 2.32, "learning_rate": 0.00017422176914860476, "loss": 0.9237, "step": 1955 }, { "epoch": 2.32, "learning_rate": 0.00017419645897635432, "loss": 1.219, "step": 1956 }, { "epoch": 2.32, "learning_rate": 0.00017417113822530727, "loss": 1.4606, "step": 1957 }, { "epoch": 2.32, "learning_rate": 0.00017414580689907377, "loss": 0.714, "step": 1958 }, { "epoch": 2.32, "learning_rate": 0.0001741204650012656, "loss": 1.2223, "step": 1959 }, { "epoch": 2.33, "learning_rate": 0.00017409511253549593, "loss": 0.9828, "step": 1960 }, { "epoch": 2.33, "learning_rate": 0.00017406974950537942, "loss": 0.9954, "step": 1961 }, { "epoch": 2.33, "learning_rate": 0.00017404437591453235, "loss": 1.0307, "step": 1962 }, { "epoch": 2.33, "learning_rate": 0.0001740189917665724, "loss": 0.9331, "step": 1963 }, { "epoch": 2.33, "learning_rate": 0.0001739935970651188, "loss": 1.3517, "step": 1964 }, { "epoch": 2.33, "learning_rate": 0.00017396819181379232, "loss": 1.2024, "step": 1965 }, { "epoch": 2.33, "learning_rate": 0.0001739427760162151, "loss": 0.9696, "step": 1966 }, { "epoch": 2.33, "learning_rate": 0.00017391734967601102, "loss": 1.1559, "step": 1967 }, { "epoch": 2.33, "learning_rate": 0.0001738919127968052, "loss": 1.3104, "step": 1968 }, { "epoch": 2.34, "learning_rate": 0.00017386646538222443, "loss": 0.9073, "step": 1969 }, { "epoch": 2.34, "learning_rate": 0.00017384100743589697, "loss": 1.0539, "step": 1970 }, { "epoch": 2.34, "learning_rate": 0.00017381553896145255, "loss": 0.9873, "step": 1971 }, { "epoch": 2.34, "learning_rate": 0.0001737900599625224, "loss": 0.9466, "step": 1972 }, { "epoch": 2.34, "learning_rate": 0.0001737645704427393, "loss": 1.0639, "step": 1973 }, { "epoch": 2.34, "learning_rate": 0.0001737390704057375, "loss": 0.5843, "step": 1974 }, { "epoch": 2.34, "learning_rate": 0.00017371355985515275, "loss": 1.1318, "step": 1975 }, { "epoch": 2.34, "learning_rate": 0.00017368803879462227, "loss": 1.0116, "step": 1976 }, { "epoch": 2.35, "learning_rate": 0.0001736625072277848, "loss": 0.8845, "step": 1977 }, { "epoch": 2.35, "learning_rate": 0.00017363696515828062, "loss": 0.8081, "step": 1978 }, { "epoch": 2.35, "learning_rate": 0.00017361141258975148, "loss": 0.8795, "step": 1979 }, { "epoch": 2.35, "learning_rate": 0.0001735858495258406, "loss": 0.9725, "step": 1980 }, { "epoch": 2.35, "learning_rate": 0.0001735602759701927, "loss": 1.0164, "step": 1981 }, { "epoch": 2.35, "learning_rate": 0.00017353469192645405, "loss": 1.2937, "step": 1982 }, { "epoch": 2.35, "learning_rate": 0.0001735090973982723, "loss": 1.0842, "step": 1983 }, { "epoch": 2.35, "learning_rate": 0.00017348349238929678, "loss": 1.0043, "step": 1984 }, { "epoch": 2.36, "learning_rate": 0.00017345787690317815, "loss": 1.1302, "step": 1985 }, { "epoch": 2.36, "learning_rate": 0.00017343225094356855, "loss": 1.195, "step": 1986 }, { "epoch": 2.36, "learning_rate": 0.00017340661451412183, "loss": 1.1449, "step": 1987 }, { "epoch": 2.36, "learning_rate": 0.00017338096761849309, "loss": 1.2244, "step": 1988 }, { "epoch": 2.36, "learning_rate": 0.00017335531026033897, "loss": 0.9273, "step": 1989 }, { "epoch": 2.36, "learning_rate": 0.00017332964244331776, "loss": 1.0448, "step": 1990 }, { "epoch": 2.36, "learning_rate": 0.00017330396417108908, "loss": 1.0074, "step": 1991 }, { "epoch": 2.36, "learning_rate": 0.00017327827544731412, "loss": 0.9284, "step": 1992 }, { "epoch": 2.36, "learning_rate": 0.0001732525762756555, "loss": 1.0307, "step": 1993 }, { "epoch": 2.37, "learning_rate": 0.00017322686665977737, "loss": 1.1526, "step": 1994 }, { "epoch": 2.37, "learning_rate": 0.00017320114660334535, "loss": 0.819, "step": 1995 }, { "epoch": 2.37, "learning_rate": 0.00017317541611002656, "loss": 1.1029, "step": 1996 }, { "epoch": 2.37, "learning_rate": 0.00017314967518348962, "loss": 1.2471, "step": 1997 }, { "epoch": 2.37, "learning_rate": 0.00017312392382740462, "loss": 1.0156, "step": 1998 }, { "epoch": 2.37, "learning_rate": 0.00017309816204544317, "loss": 1.1843, "step": 1999 }, { "epoch": 2.37, "learning_rate": 0.00017307238984127832, "loss": 1.1588, "step": 2000 }, { "epoch": 2.37, "learning_rate": 0.00017304660721858457, "loss": 1.0157, "step": 2001 }, { "epoch": 2.38, "learning_rate": 0.000173020814181038, "loss": 1.0563, "step": 2002 }, { "epoch": 2.38, "learning_rate": 0.00017299501073231622, "loss": 1.1883, "step": 2003 }, { "epoch": 2.38, "learning_rate": 0.00017296919687609808, "loss": 0.9404, "step": 2004 }, { "epoch": 2.38, "learning_rate": 0.00017294337261606417, "loss": 1.2495, "step": 2005 }, { "epoch": 2.38, "learning_rate": 0.00017291753795589643, "loss": 1.0074, "step": 2006 }, { "epoch": 2.38, "learning_rate": 0.00017289169289927837, "loss": 1.1411, "step": 2007 }, { "epoch": 2.38, "learning_rate": 0.00017286583744989488, "loss": 0.9942, "step": 2008 }, { "epoch": 2.38, "learning_rate": 0.00017283997161143239, "loss": 0.952, "step": 2009 }, { "epoch": 2.39, "learning_rate": 0.00017281409538757883, "loss": 1.2966, "step": 2010 }, { "epoch": 2.39, "learning_rate": 0.00017278820878202357, "loss": 1.0836, "step": 2011 }, { "epoch": 2.39, "learning_rate": 0.0001727623117984575, "loss": 1.0984, "step": 2012 }, { "epoch": 2.39, "learning_rate": 0.0001727364044405729, "loss": 0.8822, "step": 2013 }, { "epoch": 2.39, "learning_rate": 0.00017271048671206366, "loss": 1.2014, "step": 2014 }, { "epoch": 2.39, "learning_rate": 0.00017268455861662503, "loss": 1.1779, "step": 2015 }, { "epoch": 2.39, "learning_rate": 0.00017265862015795384, "loss": 0.9966, "step": 2016 }, { "epoch": 2.39, "learning_rate": 0.00017263267133974832, "loss": 0.9536, "step": 2017 }, { "epoch": 2.39, "learning_rate": 0.00017260671216570822, "loss": 0.811, "step": 2018 }, { "epoch": 2.4, "learning_rate": 0.00017258074263953472, "loss": 0.8241, "step": 2019 }, { "epoch": 2.4, "learning_rate": 0.00017255476276493056, "loss": 1.1263, "step": 2020 }, { "epoch": 2.4, "learning_rate": 0.00017252877254559986, "loss": 0.995, "step": 2021 }, { "epoch": 2.4, "learning_rate": 0.0001725027719852483, "loss": 1.1481, "step": 2022 }, { "epoch": 2.4, "learning_rate": 0.0001724767610875829, "loss": 1.129, "step": 2023 }, { "epoch": 2.4, "learning_rate": 0.00017245073985631238, "loss": 0.5928, "step": 2024 }, { "epoch": 2.4, "learning_rate": 0.00017242470829514672, "loss": 0.8326, "step": 2025 }, { "epoch": 2.4, "learning_rate": 0.00017239866640779745, "loss": 1.1092, "step": 2026 }, { "epoch": 2.41, "learning_rate": 0.00017237261419797756, "loss": 1.5015, "step": 2027 }, { "epoch": 2.41, "learning_rate": 0.0001723465516694016, "loss": 0.9775, "step": 2028 }, { "epoch": 2.41, "learning_rate": 0.00017232047882578548, "loss": 0.9348, "step": 2029 }, { "epoch": 2.41, "learning_rate": 0.0001722943956708466, "loss": 0.6199, "step": 2030 }, { "epoch": 2.41, "learning_rate": 0.00017226830220830384, "loss": 1.1485, "step": 2031 }, { "epoch": 2.41, "learning_rate": 0.00017224219844187764, "loss": 1.1195, "step": 2032 }, { "epoch": 2.41, "learning_rate": 0.00017221608437528973, "loss": 1.0528, "step": 2033 }, { "epoch": 2.41, "learning_rate": 0.00017218996001226345, "loss": 1.1058, "step": 2034 }, { "epoch": 2.42, "learning_rate": 0.00017216382535652355, "loss": 1.1451, "step": 2035 }, { "epoch": 2.42, "learning_rate": 0.0001721376804117963, "loss": 1.2251, "step": 2036 }, { "epoch": 2.42, "learning_rate": 0.00017211152518180936, "loss": 1.0708, "step": 2037 }, { "epoch": 2.42, "learning_rate": 0.00017208535967029188, "loss": 1.0746, "step": 2038 }, { "epoch": 2.42, "learning_rate": 0.00017205918388097456, "loss": 1.3262, "step": 2039 }, { "epoch": 2.42, "learning_rate": 0.00017203299781758943, "loss": 0.7619, "step": 2040 }, { "epoch": 2.42, "learning_rate": 0.00017200680148387007, "loss": 1.01, "step": 2041 }, { "epoch": 2.42, "learning_rate": 0.0001719805948835515, "loss": 1.1651, "step": 2042 }, { "epoch": 2.42, "learning_rate": 0.00017195437802037026, "loss": 1.4671, "step": 2043 }, { "epoch": 2.43, "learning_rate": 0.00017192815089806424, "loss": 0.9857, "step": 2044 }, { "epoch": 2.43, "learning_rate": 0.0001719019135203729, "loss": 1.2613, "step": 2045 }, { "epoch": 2.43, "learning_rate": 0.00017187566589103704, "loss": 1.4386, "step": 2046 }, { "epoch": 2.43, "learning_rate": 0.0001718494080137991, "loss": 1.0965, "step": 2047 }, { "epoch": 2.43, "learning_rate": 0.00017182313989240285, "loss": 0.752, "step": 2048 }, { "epoch": 2.43, "learning_rate": 0.00017179686153059352, "loss": 0.9126, "step": 2049 }, { "epoch": 2.43, "learning_rate": 0.00017177057293211784, "loss": 1.5075, "step": 2050 }, { "epoch": 2.43, "learning_rate": 0.000171744274100724, "loss": 1.0407, "step": 2051 }, { "epoch": 2.44, "learning_rate": 0.00017171796504016166, "loss": 0.8263, "step": 2052 }, { "epoch": 2.44, "learning_rate": 0.0001716916457541819, "loss": 0.9453, "step": 2053 }, { "epoch": 2.44, "learning_rate": 0.00017166531624653722, "loss": 0.9777, "step": 2054 }, { "epoch": 2.44, "learning_rate": 0.00017163897652098172, "loss": 1.2129, "step": 2055 }, { "epoch": 2.44, "learning_rate": 0.00017161262658127086, "loss": 1.3642, "step": 2056 }, { "epoch": 2.44, "learning_rate": 0.00017158626643116152, "loss": 0.6798, "step": 2057 }, { "epoch": 2.44, "learning_rate": 0.00017155989607441213, "loss": 0.874, "step": 2058 }, { "epoch": 2.44, "learning_rate": 0.00017153351551478247, "loss": 1.0636, "step": 2059 }, { "epoch": 2.45, "learning_rate": 0.0001715071247560339, "loss": 1.0563, "step": 2060 }, { "epoch": 2.45, "learning_rate": 0.0001714807238019291, "loss": 1.1984, "step": 2061 }, { "epoch": 2.45, "learning_rate": 0.00017145431265623234, "loss": 0.9444, "step": 2062 }, { "epoch": 2.45, "learning_rate": 0.0001714278913227092, "loss": 0.7809, "step": 2063 }, { "epoch": 2.45, "learning_rate": 0.00017140145980512684, "loss": 1.649, "step": 2064 }, { "epoch": 2.45, "learning_rate": 0.0001713750181072538, "loss": 1.0956, "step": 2065 }, { "epoch": 2.45, "learning_rate": 0.0001713485662328601, "loss": 1.2845, "step": 2066 }, { "epoch": 2.45, "learning_rate": 0.00017132210418571714, "loss": 1.0484, "step": 2067 }, { "epoch": 2.45, "learning_rate": 0.00017129563196959793, "loss": 1.0291, "step": 2068 }, { "epoch": 2.46, "learning_rate": 0.00017126914958827679, "loss": 1.1226, "step": 2069 }, { "epoch": 2.46, "learning_rate": 0.0001712426570455295, "loss": 1.0119, "step": 2070 }, { "epoch": 2.46, "learning_rate": 0.00017121615434513332, "loss": 1.1663, "step": 2071 }, { "epoch": 2.46, "learning_rate": 0.000171189641490867, "loss": 1.1353, "step": 2072 }, { "epoch": 2.46, "learning_rate": 0.00017116311848651064, "loss": 1.0761, "step": 2073 }, { "epoch": 2.46, "learning_rate": 0.00017113658533584594, "loss": 1.1978, "step": 2074 }, { "epoch": 2.46, "learning_rate": 0.00017111004204265582, "loss": 1.3881, "step": 2075 }, { "epoch": 2.46, "learning_rate": 0.00017108348861072484, "loss": 1.3945, "step": 2076 }, { "epoch": 2.47, "learning_rate": 0.00017105692504383897, "loss": 1.3796, "step": 2077 }, { "epoch": 2.47, "learning_rate": 0.00017103035134578555, "loss": 1.1721, "step": 2078 }, { "epoch": 2.47, "learning_rate": 0.0001710037675203534, "loss": 1.0061, "step": 2079 }, { "epoch": 2.47, "learning_rate": 0.00017097717357133284, "loss": 1.2456, "step": 2080 }, { "epoch": 2.47, "learning_rate": 0.00017095056950251555, "loss": 0.788, "step": 2081 }, { "epoch": 2.47, "learning_rate": 0.0001709239553176947, "loss": 1.16, "step": 2082 }, { "epoch": 2.47, "learning_rate": 0.0001708973310206649, "loss": 1.0498, "step": 2083 }, { "epoch": 2.47, "learning_rate": 0.00017087069661522218, "loss": 0.8993, "step": 2084 }, { "epoch": 2.48, "learning_rate": 0.00017084405210516406, "loss": 1.2088, "step": 2085 }, { "epoch": 2.48, "learning_rate": 0.0001708173974942894, "loss": 1.0897, "step": 2086 }, { "epoch": 2.48, "learning_rate": 0.00017079073278639863, "loss": 1.2718, "step": 2087 }, { "epoch": 2.48, "learning_rate": 0.00017076405798529355, "loss": 1.2325, "step": 2088 }, { "epoch": 2.48, "learning_rate": 0.00017073737309477736, "loss": 1.0555, "step": 2089 }, { "epoch": 2.48, "learning_rate": 0.00017071067811865476, "loss": 1.1428, "step": 2090 }, { "epoch": 2.48, "eval_loss": 2.3191208839416504, "eval_runtime": 284.1375, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 2090 }, { "epoch": 2.48, "learning_rate": 0.0001706839730607319, "loss": 1.0908, "step": 2091 }, { "epoch": 2.48, "learning_rate": 0.0001706572579248163, "loss": 1.2092, "step": 2092 }, { "epoch": 2.48, "learning_rate": 0.000170630532714717, "loss": 1.1735, "step": 2093 }, { "epoch": 2.49, "learning_rate": 0.0001706037974342444, "loss": 1.2716, "step": 2094 }, { "epoch": 2.49, "learning_rate": 0.00017057705208721035, "loss": 1.0095, "step": 2095 }, { "epoch": 2.49, "learning_rate": 0.0001705502966774282, "loss": 1.3059, "step": 2096 }, { "epoch": 2.49, "learning_rate": 0.00017052353120871266, "loss": 0.8269, "step": 2097 }, { "epoch": 2.49, "learning_rate": 0.0001704967556848799, "loss": 1.0615, "step": 2098 }, { "epoch": 2.49, "learning_rate": 0.00017046997010974755, "loss": 1.2709, "step": 2099 }, { "epoch": 2.49, "learning_rate": 0.00017044317448713461, "loss": 1.1633, "step": 2100 }, { "epoch": 2.49, "learning_rate": 0.00017041636882086158, "loss": 0.9273, "step": 2101 }, { "epoch": 2.5, "learning_rate": 0.00017038955311475038, "loss": 1.3117, "step": 2102 }, { "epoch": 2.5, "learning_rate": 0.0001703627273726243, "loss": 0.8883, "step": 2103 }, { "epoch": 2.5, "learning_rate": 0.00017033589159830815, "loss": 1.1371, "step": 2104 }, { "epoch": 2.5, "learning_rate": 0.00017030904579562806, "loss": 1.5402, "step": 2105 }, { "epoch": 2.5, "learning_rate": 0.00017028218996841172, "loss": 0.9156, "step": 2106 }, { "epoch": 2.5, "learning_rate": 0.00017025532412048817, "loss": 1.0962, "step": 2107 }, { "epoch": 2.5, "learning_rate": 0.0001702284482556879, "loss": 0.9402, "step": 2108 }, { "epoch": 2.5, "learning_rate": 0.00017020156237784279, "loss": 0.8146, "step": 2109 }, { "epoch": 2.51, "learning_rate": 0.0001701746664907862, "loss": 1.1718, "step": 2110 }, { "epoch": 2.51, "learning_rate": 0.00017014776059835288, "loss": 1.0618, "step": 2111 }, { "epoch": 2.51, "learning_rate": 0.00017012084470437907, "loss": 1.4796, "step": 2112 }, { "epoch": 2.51, "learning_rate": 0.00017009391881270237, "loss": 0.8402, "step": 2113 }, { "epoch": 2.51, "learning_rate": 0.00017006698292716178, "loss": 1.1641, "step": 2114 }, { "epoch": 2.51, "learning_rate": 0.0001700400370515978, "loss": 1.241, "step": 2115 }, { "epoch": 2.51, "learning_rate": 0.00017001308118985237, "loss": 0.8683, "step": 2116 }, { "epoch": 2.51, "learning_rate": 0.00016998611534576873, "loss": 1.2697, "step": 2117 }, { "epoch": 2.52, "learning_rate": 0.00016995913952319168, "loss": 0.9233, "step": 2118 }, { "epoch": 2.52, "learning_rate": 0.00016993215372596737, "loss": 1.2472, "step": 2119 }, { "epoch": 2.52, "learning_rate": 0.00016990515795794334, "loss": 1.2541, "step": 2120 }, { "epoch": 2.52, "learning_rate": 0.00016987815222296865, "loss": 1.0016, "step": 2121 }, { "epoch": 2.52, "learning_rate": 0.00016985113652489374, "loss": 1.0678, "step": 2122 }, { "epoch": 2.52, "learning_rate": 0.00016982411086757037, "loss": 1.6066, "step": 2123 }, { "epoch": 2.52, "learning_rate": 0.00016979707525485192, "loss": 1.229, "step": 2124 }, { "epoch": 2.52, "learning_rate": 0.00016977002969059302, "loss": 0.752, "step": 2125 }, { "epoch": 2.52, "learning_rate": 0.00016974297417864977, "loss": 0.8752, "step": 2126 }, { "epoch": 2.53, "learning_rate": 0.0001697159087228797, "loss": 0.8896, "step": 2127 }, { "epoch": 2.53, "learning_rate": 0.00016968883332714186, "loss": 0.9657, "step": 2128 }, { "epoch": 2.53, "learning_rate": 0.0001696617479952964, "loss": 1.3657, "step": 2129 }, { "epoch": 2.53, "learning_rate": 0.0001696346527312053, "loss": 0.9876, "step": 2130 }, { "epoch": 2.53, "learning_rate": 0.00016960754753873162, "loss": 1.0165, "step": 2131 }, { "epoch": 2.53, "learning_rate": 0.00016958043242174003, "loss": 1.625, "step": 2132 }, { "epoch": 2.53, "learning_rate": 0.00016955330738409655, "loss": 1.5502, "step": 2133 }, { "epoch": 2.53, "learning_rate": 0.00016952617242966864, "loss": 1.0793, "step": 2134 }, { "epoch": 2.54, "learning_rate": 0.00016949902756232507, "loss": 1.4425, "step": 2135 }, { "epoch": 2.54, "learning_rate": 0.00016947187278593622, "loss": 1.3124, "step": 2136 }, { "epoch": 2.54, "learning_rate": 0.00016944470810437365, "loss": 0.927, "step": 2137 }, { "epoch": 2.54, "learning_rate": 0.00016941753352151055, "loss": 1.1911, "step": 2138 }, { "epoch": 2.54, "learning_rate": 0.00016939034904122138, "loss": 1.0768, "step": 2139 }, { "epoch": 2.54, "learning_rate": 0.00016936315466738205, "loss": 1.1277, "step": 2140 }, { "epoch": 2.54, "learning_rate": 0.00016933595040386984, "loss": 0.812, "step": 2141 }, { "epoch": 2.54, "learning_rate": 0.0001693087362545636, "loss": 0.8299, "step": 2142 }, { "epoch": 2.55, "learning_rate": 0.00016928151222334338, "loss": 1.1125, "step": 2143 }, { "epoch": 2.55, "learning_rate": 0.00016925427831409077, "loss": 1.1835, "step": 2144 }, { "epoch": 2.55, "learning_rate": 0.00016922703453068873, "loss": 1.2007, "step": 2145 }, { "epoch": 2.55, "learning_rate": 0.00016919978087702163, "loss": 0.8524, "step": 2146 }, { "epoch": 2.55, "learning_rate": 0.00016917251735697523, "loss": 0.9497, "step": 2147 }, { "epoch": 2.55, "learning_rate": 0.00016914524397443673, "loss": 1.1004, "step": 2148 }, { "epoch": 2.55, "learning_rate": 0.00016911796073329466, "loss": 0.8347, "step": 2149 }, { "epoch": 2.55, "learning_rate": 0.00016909066763743912, "loss": 0.9492, "step": 2150 }, { "epoch": 2.55, "learning_rate": 0.00016906336469076148, "loss": 1.1406, "step": 2151 }, { "epoch": 2.56, "learning_rate": 0.00016903605189715447, "loss": 1.0137, "step": 2152 }, { "epoch": 2.56, "learning_rate": 0.0001690087292605124, "loss": 1.0624, "step": 2153 }, { "epoch": 2.56, "learning_rate": 0.00016898139678473076, "loss": 1.1767, "step": 2154 }, { "epoch": 2.56, "learning_rate": 0.0001689540544737067, "loss": 1.4184, "step": 2155 }, { "epoch": 2.56, "learning_rate": 0.00016892670233133856, "loss": 0.957, "step": 2156 }, { "epoch": 2.56, "learning_rate": 0.00016889934036152618, "loss": 1.0399, "step": 2157 }, { "epoch": 2.56, "learning_rate": 0.00016887196856817073, "loss": 1.2009, "step": 2158 }, { "epoch": 2.56, "learning_rate": 0.00016884458695517495, "loss": 1.3977, "step": 2159 }, { "epoch": 2.57, "learning_rate": 0.00016881719552644273, "loss": 1.1328, "step": 2160 }, { "epoch": 2.57, "learning_rate": 0.00016878979428587955, "loss": 1.5007, "step": 2161 }, { "epoch": 2.57, "learning_rate": 0.00016876238323739221, "loss": 1.1248, "step": 2162 }, { "epoch": 2.57, "learning_rate": 0.00016873496238488899, "loss": 1.0358, "step": 2163 }, { "epoch": 2.57, "learning_rate": 0.00016870753173227945, "loss": 1.2961, "step": 2164 }, { "epoch": 2.57, "learning_rate": 0.00016868009128347459, "loss": 0.9435, "step": 2165 }, { "epoch": 2.57, "learning_rate": 0.00016865264104238683, "loss": 0.9642, "step": 2166 }, { "epoch": 2.57, "learning_rate": 0.00016862518101293, "loss": 1.0169, "step": 2167 }, { "epoch": 2.58, "learning_rate": 0.00016859771119901929, "loss": 1.0904, "step": 2168 }, { "epoch": 2.58, "learning_rate": 0.0001685702316045713, "loss": 1.3178, "step": 2169 }, { "epoch": 2.58, "learning_rate": 0.00016854274223350397, "loss": 1.1395, "step": 2170 }, { "epoch": 2.58, "learning_rate": 0.00016851524308973678, "loss": 1.1207, "step": 2171 }, { "epoch": 2.58, "learning_rate": 0.00016848773417719044, "loss": 1.3544, "step": 2172 }, { "epoch": 2.58, "learning_rate": 0.00016846021549978715, "loss": 1.3503, "step": 2173 }, { "epoch": 2.58, "learning_rate": 0.00016843268706145042, "loss": 1.4276, "step": 2174 }, { "epoch": 2.58, "learning_rate": 0.00016840514886610529, "loss": 0.9888, "step": 2175 }, { "epoch": 2.58, "learning_rate": 0.00016837760091767802, "loss": 1.0913, "step": 2176 }, { "epoch": 2.59, "learning_rate": 0.0001683500432200964, "loss": 1.4781, "step": 2177 }, { "epoch": 2.59, "learning_rate": 0.00016832247577728955, "loss": 1.2657, "step": 2178 }, { "epoch": 2.59, "learning_rate": 0.000168294898593188, "loss": 0.9206, "step": 2179 }, { "epoch": 2.59, "learning_rate": 0.0001682673116717236, "loss": 0.9218, "step": 2180 }, { "epoch": 2.59, "learning_rate": 0.0001682397150168297, "loss": 1.2719, "step": 2181 }, { "epoch": 2.59, "learning_rate": 0.00016821210863244096, "loss": 0.984, "step": 2182 }, { "epoch": 2.59, "learning_rate": 0.00016818449252249345, "loss": 1.4641, "step": 2183 }, { "epoch": 2.59, "learning_rate": 0.0001681568666909246, "loss": 1.2571, "step": 2184 }, { "epoch": 2.6, "learning_rate": 0.00016812923114167328, "loss": 1.2025, "step": 2185 }, { "epoch": 2.6, "learning_rate": 0.00016810158587867973, "loss": 0.9621, "step": 2186 }, { "epoch": 2.6, "learning_rate": 0.00016807393090588553, "loss": 1.0016, "step": 2187 }, { "epoch": 2.6, "learning_rate": 0.00016804626622723368, "loss": 1.031, "step": 2188 }, { "epoch": 2.6, "learning_rate": 0.00016801859184666857, "loss": 0.7573, "step": 2189 }, { "epoch": 2.6, "learning_rate": 0.00016799090776813597, "loss": 1.2694, "step": 2190 }, { "epoch": 2.6, "learning_rate": 0.000167963213995583, "loss": 1.196, "step": 2191 }, { "epoch": 2.6, "learning_rate": 0.00016793551053295822, "loss": 0.8754, "step": 2192 }, { "epoch": 2.61, "learning_rate": 0.00016790779738421152, "loss": 1.1743, "step": 2193 }, { "epoch": 2.61, "learning_rate": 0.0001678800745532942, "loss": 1.0921, "step": 2194 }, { "epoch": 2.61, "learning_rate": 0.00016785234204415888, "loss": 0.8778, "step": 2195 }, { "epoch": 2.61, "learning_rate": 0.0001678245998607597, "loss": 1.0528, "step": 2196 }, { "epoch": 2.61, "learning_rate": 0.00016779684800705203, "loss": 1.0255, "step": 2197 }, { "epoch": 2.61, "learning_rate": 0.0001677690864869927, "loss": 0.6344, "step": 2198 }, { "epoch": 2.61, "learning_rate": 0.00016774131530453992, "loss": 0.8691, "step": 2199 }, { "epoch": 2.61, "learning_rate": 0.00016771353446365318, "loss": 1.2061, "step": 2200 }, { "epoch": 2.61, "learning_rate": 0.0001676857439682935, "loss": 1.1759, "step": 2201 }, { "epoch": 2.62, "learning_rate": 0.00016765794382242314, "loss": 1.1118, "step": 2202 }, { "epoch": 2.62, "learning_rate": 0.00016763013403000584, "loss": 1.3005, "step": 2203 }, { "epoch": 2.62, "learning_rate": 0.00016760231459500666, "loss": 1.0415, "step": 2204 }, { "epoch": 2.62, "learning_rate": 0.000167574485521392, "loss": 0.824, "step": 2205 }, { "epoch": 2.62, "learning_rate": 0.00016754664681312975, "loss": 0.6682, "step": 2206 }, { "epoch": 2.62, "learning_rate": 0.00016751879847418905, "loss": 1.9204, "step": 2207 }, { "epoch": 2.62, "learning_rate": 0.00016749094050854047, "loss": 0.9931, "step": 2208 }, { "epoch": 2.62, "learning_rate": 0.00016746307292015602, "loss": 0.8898, "step": 2209 }, { "epoch": 2.63, "learning_rate": 0.00016743519571300888, "loss": 1.3337, "step": 2210 }, { "epoch": 2.63, "learning_rate": 0.00016740730889107383, "loss": 1.2947, "step": 2211 }, { "epoch": 2.63, "learning_rate": 0.0001673794124583269, "loss": 1.1882, "step": 2212 }, { "epoch": 2.63, "learning_rate": 0.0001673515064187455, "loss": 1.5408, "step": 2213 }, { "epoch": 2.63, "learning_rate": 0.00016732359077630847, "loss": 1.1273, "step": 2214 }, { "epoch": 2.63, "learning_rate": 0.0001672956655349959, "loss": 0.8954, "step": 2215 }, { "epoch": 2.63, "learning_rate": 0.00016726773069878934, "loss": 1.1747, "step": 2216 }, { "epoch": 2.63, "learning_rate": 0.00016723978627167173, "loss": 0.807, "step": 2217 }, { "epoch": 2.64, "learning_rate": 0.00016721183225762727, "loss": 1.2512, "step": 2218 }, { "epoch": 2.64, "learning_rate": 0.00016718386866064166, "loss": 1.0796, "step": 2219 }, { "epoch": 2.64, "learning_rate": 0.00016715589548470185, "loss": 1.0905, "step": 2220 }, { "epoch": 2.64, "learning_rate": 0.00016712791273379622, "loss": 1.3779, "step": 2221 }, { "epoch": 2.64, "learning_rate": 0.00016709992041191452, "loss": 1.2015, "step": 2222 }, { "epoch": 2.64, "learning_rate": 0.00016707191852304782, "loss": 0.8612, "step": 2223 }, { "epoch": 2.64, "learning_rate": 0.0001670439070711886, "loss": 1.1819, "step": 2224 }, { "epoch": 2.64, "learning_rate": 0.00016701588606033064, "loss": 1.2715, "step": 2225 }, { "epoch": 2.64, "learning_rate": 0.0001669878554944692, "loss": 1.3681, "step": 2226 }, { "epoch": 2.65, "learning_rate": 0.00016695981537760072, "loss": 1.1254, "step": 2227 }, { "epoch": 2.65, "learning_rate": 0.0001669317657137232, "loss": 0.9476, "step": 2228 }, { "epoch": 2.65, "learning_rate": 0.0001669037065068359, "loss": 1.235, "step": 2229 }, { "epoch": 2.65, "learning_rate": 0.00016687563776093941, "loss": 0.7356, "step": 2230 }, { "epoch": 2.65, "learning_rate": 0.00016684755948003573, "loss": 0.7901, "step": 2231 }, { "epoch": 2.65, "learning_rate": 0.00016681947166812824, "loss": 1.317, "step": 2232 }, { "epoch": 2.65, "learning_rate": 0.00016679137432922163, "loss": 0.8832, "step": 2233 }, { "epoch": 2.65, "learning_rate": 0.00016676326746732195, "loss": 1.2776, "step": 2234 }, { "epoch": 2.66, "learning_rate": 0.00016673515108643665, "loss": 1.0435, "step": 2235 }, { "epoch": 2.66, "learning_rate": 0.0001667070251905745, "loss": 1.0957, "step": 2236 }, { "epoch": 2.66, "learning_rate": 0.00016667888978374567, "loss": 1.0862, "step": 2237 }, { "epoch": 2.66, "learning_rate": 0.00016665074486996165, "loss": 1.1112, "step": 2238 }, { "epoch": 2.66, "learning_rate": 0.0001666225904532352, "loss": 1.3633, "step": 2239 }, { "epoch": 2.66, "learning_rate": 0.00016659442653758064, "loss": 1.444, "step": 2240 }, { "epoch": 2.66, "learning_rate": 0.00016656625312701348, "loss": 0.8248, "step": 2241 }, { "epoch": 2.66, "learning_rate": 0.00016653807022555067, "loss": 1.2522, "step": 2242 }, { "epoch": 2.67, "learning_rate": 0.0001665098778372104, "loss": 1.2107, "step": 2243 }, { "epoch": 2.67, "learning_rate": 0.0001664816759660124, "loss": 1.0813, "step": 2244 }, { "epoch": 2.67, "learning_rate": 0.00016645346461597753, "loss": 1.1136, "step": 2245 }, { "epoch": 2.67, "learning_rate": 0.00016642524379112817, "loss": 1.1003, "step": 2246 }, { "epoch": 2.67, "learning_rate": 0.000166397013495488, "loss": 1.0635, "step": 2247 }, { "epoch": 2.67, "learning_rate": 0.00016636877373308204, "loss": 1.0575, "step": 2248 }, { "epoch": 2.67, "learning_rate": 0.00016634052450793663, "loss": 0.7693, "step": 2249 }, { "epoch": 2.67, "learning_rate": 0.00016631226582407952, "loss": 1.5965, "step": 2250 }, { "epoch": 2.67, "learning_rate": 0.0001662839976855398, "loss": 1.0989, "step": 2251 }, { "epoch": 2.68, "learning_rate": 0.00016625572009634787, "loss": 0.9198, "step": 2252 }, { "epoch": 2.68, "learning_rate": 0.00016622743306053548, "loss": 1.0896, "step": 2253 }, { "epoch": 2.68, "learning_rate": 0.00016619913658213578, "loss": 1.015, "step": 2254 }, { "epoch": 2.68, "learning_rate": 0.0001661708306651832, "loss": 0.8572, "step": 2255 }, { "epoch": 2.68, "learning_rate": 0.00016614251531371353, "loss": 1.1508, "step": 2256 }, { "epoch": 2.68, "learning_rate": 0.000166114190531764, "loss": 1.1852, "step": 2257 }, { "epoch": 2.68, "learning_rate": 0.00016608585632337306, "loss": 0.932, "step": 2258 }, { "epoch": 2.68, "learning_rate": 0.00016605751269258053, "loss": 1.2542, "step": 2259 }, { "epoch": 2.69, "learning_rate": 0.00016602915964342757, "loss": 0.943, "step": 2260 }, { "epoch": 2.69, "learning_rate": 0.00016600079717995678, "loss": 1.2438, "step": 2261 }, { "epoch": 2.69, "learning_rate": 0.00016597242530621203, "loss": 0.9928, "step": 2262 }, { "epoch": 2.69, "learning_rate": 0.00016594404402623845, "loss": 0.9516, "step": 2263 }, { "epoch": 2.69, "learning_rate": 0.00016591565334408265, "loss": 1.1689, "step": 2264 }, { "epoch": 2.69, "learning_rate": 0.0001658872532637925, "loss": 1.3155, "step": 2265 }, { "epoch": 2.69, "learning_rate": 0.00016585884378941725, "loss": 1.1596, "step": 2266 }, { "epoch": 2.69, "learning_rate": 0.00016583042492500746, "loss": 0.9956, "step": 2267 }, { "epoch": 2.7, "learning_rate": 0.00016580199667461508, "loss": 0.9289, "step": 2268 }, { "epoch": 2.7, "learning_rate": 0.00016577355904229325, "loss": 1.3225, "step": 2269 }, { "epoch": 2.7, "learning_rate": 0.00016574511203209667, "loss": 1.0384, "step": 2270 }, { "epoch": 2.7, "learning_rate": 0.0001657166556480812, "loss": 0.697, "step": 2271 }, { "epoch": 2.7, "learning_rate": 0.00016568818989430416, "loss": 0.7702, "step": 2272 }, { "epoch": 2.7, "learning_rate": 0.00016565971477482404, "loss": 1.1041, "step": 2273 }, { "epoch": 2.7, "learning_rate": 0.00016563123029370093, "loss": 1.0462, "step": 2274 }, { "epoch": 2.7, "learning_rate": 0.0001656027364549959, "loss": 1.0797, "step": 2275 }, { "epoch": 2.7, "learning_rate": 0.0001655742332627717, "loss": 1.3301, "step": 2276 }, { "epoch": 2.71, "learning_rate": 0.0001655457207210922, "loss": 1.0467, "step": 2277 }, { "epoch": 2.71, "learning_rate": 0.00016551719883402271, "loss": 0.9432, "step": 2278 }, { "epoch": 2.71, "learning_rate": 0.00016548866760562978, "loss": 1.1808, "step": 2279 }, { "epoch": 2.71, "learning_rate": 0.00016546012703998138, "loss": 1.1094, "step": 2280 }, { "epoch": 2.71, "learning_rate": 0.00016543157714114673, "loss": 1.3914, "step": 2281 }, { "epoch": 2.71, "learning_rate": 0.00016540301791319645, "loss": 1.0402, "step": 2282 }, { "epoch": 2.71, "learning_rate": 0.00016537444936020246, "loss": 0.9815, "step": 2283 }, { "epoch": 2.71, "learning_rate": 0.000165345871486238, "loss": 0.9722, "step": 2284 }, { "epoch": 2.72, "learning_rate": 0.00016531728429537766, "loss": 0.919, "step": 2285 }, { "epoch": 2.72, "learning_rate": 0.00016528868779169738, "loss": 1.1242, "step": 2286 }, { "epoch": 2.72, "learning_rate": 0.00016526008197927436, "loss": 1.1794, "step": 2287 }, { "epoch": 2.72, "learning_rate": 0.00016523146686218718, "loss": 1.434, "step": 2288 }, { "epoch": 2.72, "learning_rate": 0.00016520284244451574, "loss": 0.8463, "step": 2289 }, { "epoch": 2.72, "learning_rate": 0.00016517420873034123, "loss": 1.1736, "step": 2290 }, { "epoch": 2.72, "learning_rate": 0.0001651455657237462, "loss": 1.0431, "step": 2291 }, { "epoch": 2.72, "learning_rate": 0.00016511691342881453, "loss": 1.2796, "step": 2292 }, { "epoch": 2.73, "learning_rate": 0.0001650882518496314, "loss": 1.0578, "step": 2293 }, { "epoch": 2.73, "learning_rate": 0.00016505958099028334, "loss": 1.3914, "step": 2294 }, { "epoch": 2.73, "learning_rate": 0.0001650309008548582, "loss": 1.0046, "step": 2295 }, { "epoch": 2.73, "learning_rate": 0.0001650022114474451, "loss": 1.0246, "step": 2296 }, { "epoch": 2.73, "learning_rate": 0.00016497351277213458, "loss": 1.2789, "step": 2297 }, { "epoch": 2.73, "learning_rate": 0.00016494480483301836, "loss": 1.0036, "step": 2298 }, { "epoch": 2.73, "learning_rate": 0.00016491608763418968, "loss": 0.886, "step": 2299 }, { "epoch": 2.73, "eval_loss": 2.3017475605010986, "eval_runtime": 283.8846, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 2299 }, { "epoch": 2.73, "learning_rate": 0.0001648873611797429, "loss": 1.3953, "step": 2300 }, { "epoch": 2.73, "learning_rate": 0.0001648586254737738, "loss": 0.6972, "step": 2301 }, { "epoch": 2.74, "learning_rate": 0.00016482988052037947, "loss": 1.2311, "step": 2302 }, { "epoch": 2.74, "learning_rate": 0.00016480112632365833, "loss": 1.327, "step": 2303 }, { "epoch": 2.74, "learning_rate": 0.0001647723628877101, "loss": 0.9534, "step": 2304 }, { "epoch": 2.74, "learning_rate": 0.0001647435902166358, "loss": 0.9164, "step": 2305 }, { "epoch": 2.74, "learning_rate": 0.0001647148083145378, "loss": 1.1038, "step": 2306 }, { "epoch": 2.74, "learning_rate": 0.00016468601718551976, "loss": 1.0444, "step": 2307 }, { "epoch": 2.74, "learning_rate": 0.00016465721683368666, "loss": 1.2635, "step": 2308 }, { "epoch": 2.74, "learning_rate": 0.00016462840726314486, "loss": 1.1647, "step": 2309 }, { "epoch": 2.75, "learning_rate": 0.00016459958847800187, "loss": 1.3617, "step": 2310 }, { "epoch": 2.75, "learning_rate": 0.00016457076048236675, "loss": 1.2355, "step": 2311 }, { "epoch": 2.75, "learning_rate": 0.00016454192328034962, "loss": 0.9989, "step": 2312 }, { "epoch": 2.75, "learning_rate": 0.00016451307687606213, "loss": 1.1218, "step": 2313 }, { "epoch": 2.75, "learning_rate": 0.00016448422127361706, "loss": 0.8967, "step": 2314 }, { "epoch": 2.75, "learning_rate": 0.0001644553564771287, "loss": 1.159, "step": 2315 }, { "epoch": 2.75, "learning_rate": 0.0001644264824907124, "loss": 1.5901, "step": 2316 }, { "epoch": 2.75, "learning_rate": 0.0001643975993184851, "loss": 0.979, "step": 2317 }, { "epoch": 2.76, "learning_rate": 0.00016436870696456482, "loss": 0.8561, "step": 2318 }, { "epoch": 2.76, "learning_rate": 0.00016433980543307107, "loss": 0.9485, "step": 2319 }, { "epoch": 2.76, "learning_rate": 0.00016431089472812444, "loss": 0.7736, "step": 2320 }, { "epoch": 2.76, "learning_rate": 0.00016428197485384707, "loss": 1.2546, "step": 2321 }, { "epoch": 2.76, "learning_rate": 0.00016425304581436226, "loss": 0.9534, "step": 2322 }, { "epoch": 2.76, "learning_rate": 0.0001642241076137947, "loss": 0.8182, "step": 2323 }, { "epoch": 2.76, "learning_rate": 0.0001641951602562703, "loss": 1.1107, "step": 2324 }, { "epoch": 2.76, "learning_rate": 0.0001641662037459164, "loss": 1.0628, "step": 2325 }, { "epoch": 2.76, "learning_rate": 0.00016413723808686147, "loss": 1.6261, "step": 2326 }, { "epoch": 2.77, "learning_rate": 0.0001641082632832354, "loss": 1.0286, "step": 2327 }, { "epoch": 2.77, "learning_rate": 0.0001640792793391694, "loss": 0.5732, "step": 2328 }, { "epoch": 2.77, "learning_rate": 0.00016405028625879594, "loss": 1.0932, "step": 2329 }, { "epoch": 2.77, "learning_rate": 0.00016402128404624882, "loss": 1.2585, "step": 2330 }, { "epoch": 2.77, "learning_rate": 0.00016399227270566308, "loss": 0.8788, "step": 2331 }, { "epoch": 2.77, "learning_rate": 0.0001639632522411751, "loss": 1.1397, "step": 2332 }, { "epoch": 2.77, "learning_rate": 0.00016393422265692262, "loss": 1.3517, "step": 2333 }, { "epoch": 2.77, "learning_rate": 0.0001639051839570446, "loss": 1.1346, "step": 2334 }, { "epoch": 2.78, "learning_rate": 0.00016387613614568126, "loss": 0.9594, "step": 2335 }, { "epoch": 2.78, "learning_rate": 0.0001638470792269743, "loss": 1.0674, "step": 2336 }, { "epoch": 2.78, "learning_rate": 0.00016381801320506653, "loss": 0.9123, "step": 2337 }, { "epoch": 2.78, "learning_rate": 0.00016378893808410215, "loss": 1.1909, "step": 2338 }, { "epoch": 2.78, "learning_rate": 0.00016375985386822664, "loss": 1.0474, "step": 2339 }, { "epoch": 2.78, "learning_rate": 0.00016373076056158675, "loss": 0.8844, "step": 2340 }, { "epoch": 2.78, "learning_rate": 0.0001637016581683306, "loss": 1.1606, "step": 2341 }, { "epoch": 2.78, "learning_rate": 0.00016367254669260749, "loss": 0.6206, "step": 2342 }, { "epoch": 2.79, "learning_rate": 0.00016364342613856816, "loss": 0.7225, "step": 2343 }, { "epoch": 2.79, "learning_rate": 0.00016361429651036446, "loss": 1.1782, "step": 2344 }, { "epoch": 2.79, "learning_rate": 0.00016358515781214977, "loss": 1.0911, "step": 2345 }, { "epoch": 2.79, "learning_rate": 0.00016355601004807856, "loss": 1.2727, "step": 2346 }, { "epoch": 2.79, "learning_rate": 0.00016352685322230663, "loss": 0.8294, "step": 2347 }, { "epoch": 2.79, "learning_rate": 0.00016349768733899117, "loss": 1.1661, "step": 2348 }, { "epoch": 2.79, "learning_rate": 0.00016346851240229057, "loss": 0.8267, "step": 2349 }, { "epoch": 2.79, "learning_rate": 0.00016343932841636456, "loss": 1.2873, "step": 2350 }, { "epoch": 2.79, "learning_rate": 0.00016341013538537412, "loss": 1.2459, "step": 2351 }, { "epoch": 2.8, "learning_rate": 0.00016338093331348156, "loss": 0.8939, "step": 2352 }, { "epoch": 2.8, "learning_rate": 0.00016335172220485042, "loss": 1.024, "step": 2353 }, { "epoch": 2.8, "learning_rate": 0.0001633225020636456, "loss": 0.9981, "step": 2354 }, { "epoch": 2.8, "learning_rate": 0.00016329327289403325, "loss": 1.331, "step": 2355 }, { "epoch": 2.8, "learning_rate": 0.00016326403470018084, "loss": 0.7446, "step": 2356 }, { "epoch": 2.8, "learning_rate": 0.00016323478748625703, "loss": 1.1931, "step": 2357 }, { "epoch": 2.8, "learning_rate": 0.00016320553125643187, "loss": 1.1287, "step": 2358 }, { "epoch": 2.8, "learning_rate": 0.00016317626601487667, "loss": 1.109, "step": 2359 }, { "epoch": 2.81, "learning_rate": 0.00016314699176576402, "loss": 0.9946, "step": 2360 }, { "epoch": 2.81, "learning_rate": 0.00016311770851326778, "loss": 0.8347, "step": 2361 }, { "epoch": 2.81, "learning_rate": 0.00016308841626156307, "loss": 0.9214, "step": 2362 }, { "epoch": 2.81, "learning_rate": 0.0001630591150148264, "loss": 0.5907, "step": 2363 }, { "epoch": 2.81, "learning_rate": 0.00016302980477723539, "loss": 1.2412, "step": 2364 }, { "epoch": 2.81, "learning_rate": 0.00016300048555296915, "loss": 1.2908, "step": 2365 }, { "epoch": 2.81, "learning_rate": 0.00016297115734620788, "loss": 1.2345, "step": 2366 }, { "epoch": 2.81, "learning_rate": 0.00016294182016113315, "loss": 1.0418, "step": 2367 }, { "epoch": 2.82, "learning_rate": 0.00016291247400192785, "loss": 1.1457, "step": 2368 }, { "epoch": 2.82, "learning_rate": 0.00016288311887277608, "loss": 1.2529, "step": 2369 }, { "epoch": 2.82, "learning_rate": 0.00016285375477786322, "loss": 1.0013, "step": 2370 }, { "epoch": 2.82, "learning_rate": 0.00016282438172137597, "loss": 0.943, "step": 2371 }, { "epoch": 2.82, "learning_rate": 0.00016279499970750226, "loss": 0.7009, "step": 2372 }, { "epoch": 2.82, "learning_rate": 0.00016276560874043137, "loss": 0.9408, "step": 2373 }, { "epoch": 2.82, "learning_rate": 0.0001627362088243538, "loss": 1.1788, "step": 2374 }, { "epoch": 2.82, "learning_rate": 0.0001627067999634613, "loss": 0.8106, "step": 2375 }, { "epoch": 2.82, "learning_rate": 0.00016267738216194696, "loss": 1.1695, "step": 2376 }, { "epoch": 2.83, "learning_rate": 0.0001626479554240051, "loss": 0.9209, "step": 2377 }, { "epoch": 2.83, "learning_rate": 0.00016261851975383137, "loss": 0.9911, "step": 2378 }, { "epoch": 2.83, "learning_rate": 0.00016258907515562262, "loss": 1.3819, "step": 2379 }, { "epoch": 2.83, "learning_rate": 0.000162559621633577, "loss": 0.8926, "step": 2380 }, { "epoch": 2.83, "learning_rate": 0.000162530159191894, "loss": 1.0896, "step": 2381 }, { "epoch": 2.83, "learning_rate": 0.00016250068783477424, "loss": 0.8403, "step": 2382 }, { "epoch": 2.83, "learning_rate": 0.00016247120756641972, "loss": 0.7976, "step": 2383 }, { "epoch": 2.83, "learning_rate": 0.0001624417183910337, "loss": 0.8881, "step": 2384 }, { "epoch": 2.84, "learning_rate": 0.0001624122203128207, "loss": 0.8302, "step": 2385 }, { "epoch": 2.84, "learning_rate": 0.0001623827133359865, "loss": 1.3312, "step": 2386 }, { "epoch": 2.84, "learning_rate": 0.0001623531974647381, "loss": 1.003, "step": 2387 }, { "epoch": 2.84, "learning_rate": 0.0001623236727032839, "loss": 0.9487, "step": 2388 }, { "epoch": 2.84, "learning_rate": 0.00016229413905583342, "loss": 1.2259, "step": 2389 }, { "epoch": 2.84, "learning_rate": 0.00016226459652659753, "loss": 0.9327, "step": 2390 }, { "epoch": 2.84, "learning_rate": 0.00016223504511978838, "loss": 0.7336, "step": 2391 }, { "epoch": 2.84, "learning_rate": 0.00016220548483961934, "loss": 1.0454, "step": 2392 }, { "epoch": 2.85, "learning_rate": 0.00016217591569030505, "loss": 1.3371, "step": 2393 }, { "epoch": 2.85, "learning_rate": 0.00016214633767606143, "loss": 1.0814, "step": 2394 }, { "epoch": 2.85, "learning_rate": 0.00016211675080110566, "loss": 1.2274, "step": 2395 }, { "epoch": 2.85, "learning_rate": 0.0001620871550696562, "loss": 0.9775, "step": 2396 }, { "epoch": 2.85, "learning_rate": 0.00016205755048593273, "loss": 1.0323, "step": 2397 }, { "epoch": 2.85, "learning_rate": 0.00016202793705415622, "loss": 1.5101, "step": 2398 }, { "epoch": 2.85, "learning_rate": 0.00016199831477854893, "loss": 0.8118, "step": 2399 }, { "epoch": 2.85, "learning_rate": 0.0001619686836633343, "loss": 1.0233, "step": 2400 }, { "epoch": 2.85, "learning_rate": 0.00016193904371273715, "loss": 0.9038, "step": 2401 }, { "epoch": 2.86, "learning_rate": 0.00016190939493098344, "loss": 0.875, "step": 2402 }, { "epoch": 2.86, "learning_rate": 0.00016187973732230038, "loss": 1.3274, "step": 2403 }, { "epoch": 2.86, "learning_rate": 0.00016185007089091665, "loss": 1.081, "step": 2404 }, { "epoch": 2.86, "learning_rate": 0.00016182039564106192, "loss": 1.0841, "step": 2405 }, { "epoch": 2.86, "learning_rate": 0.00016179071157696728, "loss": 1.3208, "step": 2406 }, { "epoch": 2.86, "learning_rate": 0.000161761018702865, "loss": 1.1854, "step": 2407 }, { "epoch": 2.86, "learning_rate": 0.0001617313170229887, "loss": 1.0651, "step": 2408 }, { "epoch": 2.86, "learning_rate": 0.0001617016065415731, "loss": 1.1398, "step": 2409 }, { "epoch": 2.87, "learning_rate": 0.00016167188726285434, "loss": 1.2778, "step": 2410 }, { "epoch": 2.87, "learning_rate": 0.00016164215919106968, "loss": 1.6758, "step": 2411 }, { "epoch": 2.87, "learning_rate": 0.0001616124223304577, "loss": 0.8341, "step": 2412 }, { "epoch": 2.87, "learning_rate": 0.00016158267668525832, "loss": 0.9513, "step": 2413 }, { "epoch": 2.87, "learning_rate": 0.00016155292225971253, "loss": 0.9617, "step": 2414 }, { "epoch": 2.87, "learning_rate": 0.00016152315905806268, "loss": 0.8664, "step": 2415 }, { "epoch": 2.87, "learning_rate": 0.00016149338708455237, "loss": 1.331, "step": 2416 }, { "epoch": 2.87, "learning_rate": 0.00016146360634342643, "loss": 1.4212, "step": 2417 }, { "epoch": 2.88, "learning_rate": 0.00016143381683893094, "loss": 1.2126, "step": 2418 }, { "epoch": 2.88, "learning_rate": 0.00016140401857531322, "loss": 0.934, "step": 2419 }, { "epoch": 2.88, "learning_rate": 0.00016137421155682183, "loss": 1.2417, "step": 2420 }, { "epoch": 2.88, "learning_rate": 0.0001613443957877067, "loss": 1.637, "step": 2421 }, { "epoch": 2.88, "learning_rate": 0.00016131457127221881, "loss": 1.1456, "step": 2422 }, { "epoch": 2.88, "learning_rate": 0.00016128473801461053, "loss": 0.9402, "step": 2423 }, { "epoch": 2.88, "learning_rate": 0.0001612548960191354, "loss": 1.3797, "step": 2424 }, { "epoch": 2.88, "learning_rate": 0.0001612250452900483, "loss": 0.8191, "step": 2425 }, { "epoch": 2.88, "learning_rate": 0.0001611951858316052, "loss": 1.1725, "step": 2426 }, { "epoch": 2.89, "learning_rate": 0.00016116531764806346, "loss": 1.5701, "step": 2427 }, { "epoch": 2.89, "learning_rate": 0.00016113544074368164, "loss": 1.0591, "step": 2428 }, { "epoch": 2.89, "learning_rate": 0.00016110555512271953, "loss": 1.03, "step": 2429 }, { "epoch": 2.89, "learning_rate": 0.0001610756607894382, "loss": 1.1829, "step": 2430 }, { "epoch": 2.89, "learning_rate": 0.00016104575774809985, "loss": 1.2222, "step": 2431 }, { "epoch": 2.89, "learning_rate": 0.00016101584600296804, "loss": 1.1537, "step": 2432 }, { "epoch": 2.89, "learning_rate": 0.00016098592555830753, "loss": 1.0973, "step": 2433 }, { "epoch": 2.89, "learning_rate": 0.00016095599641838436, "loss": 1.0793, "step": 2434 }, { "epoch": 2.9, "learning_rate": 0.00016092605858746573, "loss": 1.3484, "step": 2435 }, { "epoch": 2.9, "learning_rate": 0.0001608961120698201, "loss": 1.1689, "step": 2436 }, { "epoch": 2.9, "learning_rate": 0.00016086615686971726, "loss": 1.0864, "step": 2437 }, { "epoch": 2.9, "learning_rate": 0.00016083619299142813, "loss": 1.2451, "step": 2438 }, { "epoch": 2.9, "learning_rate": 0.0001608062204392249, "loss": 0.9593, "step": 2439 }, { "epoch": 2.9, "learning_rate": 0.00016077623921738102, "loss": 0.9816, "step": 2440 }, { "epoch": 2.9, "learning_rate": 0.00016074624933017112, "loss": 1.0845, "step": 2441 }, { "epoch": 2.9, "learning_rate": 0.00016071625078187114, "loss": 0.9875, "step": 2442 }, { "epoch": 2.91, "learning_rate": 0.0001606862435767582, "loss": 0.8758, "step": 2443 }, { "epoch": 2.91, "learning_rate": 0.00016065622771911067, "loss": 0.9499, "step": 2444 }, { "epoch": 2.91, "learning_rate": 0.00016062620321320823, "loss": 1.1133, "step": 2445 }, { "epoch": 2.91, "learning_rate": 0.0001605961700633316, "loss": 0.7228, "step": 2446 }, { "epoch": 2.91, "learning_rate": 0.00016056612827376293, "loss": 1.2297, "step": 2447 }, { "epoch": 2.91, "learning_rate": 0.0001605360778487855, "loss": 1.0251, "step": 2448 }, { "epoch": 2.91, "learning_rate": 0.00016050601879268386, "loss": 0.8097, "step": 2449 }, { "epoch": 2.91, "learning_rate": 0.00016047595110974376, "loss": 0.9872, "step": 2450 }, { "epoch": 2.91, "learning_rate": 0.0001604458748042522, "loss": 1.1119, "step": 2451 }, { "epoch": 2.92, "learning_rate": 0.0001604157898804974, "loss": 0.8256, "step": 2452 }, { "epoch": 2.92, "learning_rate": 0.00016038569634276882, "loss": 0.9036, "step": 2453 }, { "epoch": 2.92, "learning_rate": 0.00016035559419535716, "loss": 1.1173, "step": 2454 }, { "epoch": 2.92, "learning_rate": 0.00016032548344255428, "loss": 1.3173, "step": 2455 }, { "epoch": 2.92, "learning_rate": 0.00016029536408865337, "loss": 0.717, "step": 2456 }, { "epoch": 2.92, "learning_rate": 0.00016026523613794878, "loss": 0.9806, "step": 2457 }, { "epoch": 2.92, "learning_rate": 0.00016023509959473605, "loss": 1.1509, "step": 2458 }, { "epoch": 2.92, "learning_rate": 0.00016020495446331207, "loss": 1.0454, "step": 2459 }, { "epoch": 2.93, "learning_rate": 0.0001601748007479748, "loss": 1.183, "step": 2460 }, { "epoch": 2.93, "learning_rate": 0.0001601446384530236, "loss": 1.2611, "step": 2461 }, { "epoch": 2.93, "learning_rate": 0.00016011446758275888, "loss": 1.0377, "step": 2462 }, { "epoch": 2.93, "learning_rate": 0.00016008428814148236, "loss": 1.2111, "step": 2463 }, { "epoch": 2.93, "learning_rate": 0.00016005410013349698, "loss": 1.0952, "step": 2464 }, { "epoch": 2.93, "learning_rate": 0.00016002390356310685, "loss": 0.7589, "step": 2465 }, { "epoch": 2.93, "learning_rate": 0.00015999369843461742, "loss": 0.8543, "step": 2466 }, { "epoch": 2.93, "learning_rate": 0.00015996348475233525, "loss": 1.1509, "step": 2467 }, { "epoch": 2.94, "learning_rate": 0.0001599332625205681, "loss": 1.287, "step": 2468 }, { "epoch": 2.94, "learning_rate": 0.00015990303174362512, "loss": 1.0401, "step": 2469 }, { "epoch": 2.94, "learning_rate": 0.0001598727924258164, "loss": 1.0247, "step": 2470 }, { "epoch": 2.94, "learning_rate": 0.00015984254457145354, "loss": 1.1537, "step": 2471 }, { "epoch": 2.94, "learning_rate": 0.00015981228818484917, "loss": 0.9606, "step": 2472 }, { "epoch": 2.94, "learning_rate": 0.0001597820232703172, "loss": 0.8709, "step": 2473 }, { "epoch": 2.94, "learning_rate": 0.00015975174983217275, "loss": 1.2827, "step": 2474 }, { "epoch": 2.94, "learning_rate": 0.00015972146787473213, "loss": 0.8057, "step": 2475 }, { "epoch": 2.94, "learning_rate": 0.0001596911774023129, "loss": 1.0857, "step": 2476 }, { "epoch": 2.95, "learning_rate": 0.00015966087841923386, "loss": 1.1731, "step": 2477 }, { "epoch": 2.95, "learning_rate": 0.0001596305709298149, "loss": 0.8871, "step": 2478 }, { "epoch": 2.95, "learning_rate": 0.00015960025493837727, "loss": 1.0671, "step": 2479 }, { "epoch": 2.95, "learning_rate": 0.00015956993044924334, "loss": 1.3735, "step": 2480 }, { "epoch": 2.95, "learning_rate": 0.00015953959746673675, "loss": 1.4655, "step": 2481 }, { "epoch": 2.95, "learning_rate": 0.00015950925599518228, "loss": 1.3975, "step": 2482 }, { "epoch": 2.95, "learning_rate": 0.00015947890603890602, "loss": 0.9468, "step": 2483 }, { "epoch": 2.95, "learning_rate": 0.0001594485476022352, "loss": 0.9976, "step": 2484 }, { "epoch": 2.96, "learning_rate": 0.00015941818068949818, "loss": 0.6732, "step": 2485 }, { "epoch": 2.96, "learning_rate": 0.00015938780530502474, "loss": 0.9848, "step": 2486 }, { "epoch": 2.96, "learning_rate": 0.00015935742145314568, "loss": 1.2441, "step": 2487 }, { "epoch": 2.96, "learning_rate": 0.0001593270291381931, "loss": 0.9631, "step": 2488 }, { "epoch": 2.96, "learning_rate": 0.00015929662836450029, "loss": 0.8868, "step": 2489 }, { "epoch": 2.96, "learning_rate": 0.0001592662191364017, "loss": 0.9063, "step": 2490 }, { "epoch": 2.96, "learning_rate": 0.00015923580145823303, "loss": 0.6886, "step": 2491 }, { "epoch": 2.96, "learning_rate": 0.0001592053753343312, "loss": 1.0702, "step": 2492 }, { "epoch": 2.97, "learning_rate": 0.0001591749407690343, "loss": 1.3879, "step": 2493 }, { "epoch": 2.97, "learning_rate": 0.00015914449776668167, "loss": 1.1048, "step": 2494 }, { "epoch": 2.97, "learning_rate": 0.0001591140463316137, "loss": 0.9921, "step": 2495 }, { "epoch": 2.97, "learning_rate": 0.00015908358646817225, "loss": 1.3042, "step": 2496 }, { "epoch": 2.97, "learning_rate": 0.00015905311818070015, "loss": 0.8413, "step": 2497 }, { "epoch": 2.97, "learning_rate": 0.00015902264147354153, "loss": 1.5201, "step": 2498 }, { "epoch": 2.97, "learning_rate": 0.0001589921563510417, "loss": 1.0727, "step": 2499 }, { "epoch": 2.97, "learning_rate": 0.0001589616628175472, "loss": 1.0439, "step": 2500 }, { "epoch": 2.97, "learning_rate": 0.0001589311608774057, "loss": 1.2308, "step": 2501 }, { "epoch": 2.98, "learning_rate": 0.00015890065053496613, "loss": 1.1155, "step": 2502 }, { "epoch": 2.98, "learning_rate": 0.00015887013179457862, "loss": 1.3345, "step": 2503 }, { "epoch": 2.98, "learning_rate": 0.00015883960466059444, "loss": 0.9551, "step": 2504 }, { "epoch": 2.98, "learning_rate": 0.0001588090691373661, "loss": 1.0713, "step": 2505 }, { "epoch": 2.98, "learning_rate": 0.00015877852522924732, "loss": 1.299, "step": 2506 }, { "epoch": 2.98, "learning_rate": 0.000158747972940593, "loss": 0.8535, "step": 2507 }, { "epoch": 2.98, "learning_rate": 0.0001587174122757592, "loss": 0.9924, "step": 2508 }, { "epoch": 2.98, "eval_loss": 2.328662395477295, "eval_runtime": 283.7765, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 2508 }, { "epoch": 2.98, "learning_rate": 0.0001586868432391032, "loss": 1.0512, "step": 2509 }, { "epoch": 2.99, "learning_rate": 0.00015865626583498355, "loss": 1.2775, "step": 2510 }, { "epoch": 2.99, "learning_rate": 0.00015862568006775983, "loss": 0.7054, "step": 2511 }, { "epoch": 2.99, "learning_rate": 0.00015859508594179294, "loss": 0.8524, "step": 2512 }, { "epoch": 2.99, "learning_rate": 0.00015856448346144496, "loss": 0.9871, "step": 2513 }, { "epoch": 2.99, "learning_rate": 0.00015853387263107909, "loss": 0.8642, "step": 2514 }, { "epoch": 2.99, "learning_rate": 0.00015850325345505975, "loss": 1.1789, "step": 2515 }, { "epoch": 2.99, "learning_rate": 0.00015847262593775266, "loss": 1.2765, "step": 2516 }, { "epoch": 2.99, "learning_rate": 0.00015844199008352458, "loss": 0.6272, "step": 2517 }, { "epoch": 3.0, "learning_rate": 0.00015841134589674352, "loss": 1.3037, "step": 2518 }, { "epoch": 3.0, "learning_rate": 0.00015838069338177863, "loss": 1.054, "step": 2519 }, { "epoch": 3.0, "learning_rate": 0.00015835003254300039, "loss": 1.1942, "step": 2520 }, { "epoch": 3.0, "learning_rate": 0.00015831936338478025, "loss": 0.8866, "step": 2521 }, { "epoch": 3.0, "learning_rate": 0.00015828868591149104, "loss": 1.1444, "step": 2522 }, { "epoch": 3.0, "learning_rate": 0.00015825800012750666, "loss": 0.8597, "step": 2523 }, { "epoch": 3.0, "learning_rate": 0.0001582273060372023, "loss": 0.7731, "step": 2524 }, { "epoch": 3.0, "learning_rate": 0.00015819660364495416, "loss": 1.1953, "step": 2525 }, { "epoch": 3.0, "learning_rate": 0.0001581658929551398, "loss": 1.3946, "step": 2526 }, { "epoch": 3.01, "learning_rate": 0.00015813517397213791, "loss": 1.0173, "step": 2527 }, { "epoch": 3.01, "learning_rate": 0.00015810444670032831, "loss": 1.1762, "step": 2528 }, { "epoch": 3.01, "learning_rate": 0.00015807371114409202, "loss": 0.7283, "step": 2529 }, { "epoch": 3.01, "learning_rate": 0.00015804296730781135, "loss": 1.1515, "step": 2530 }, { "epoch": 3.01, "learning_rate": 0.00015801221519586958, "loss": 0.9389, "step": 2531 }, { "epoch": 3.01, "learning_rate": 0.0001579814548126514, "loss": 1.1869, "step": 2532 }, { "epoch": 3.01, "learning_rate": 0.00015795068616254247, "loss": 1.2957, "step": 2533 }, { "epoch": 3.01, "learning_rate": 0.00015791990924992981, "loss": 1.0514, "step": 2534 }, { "epoch": 3.02, "learning_rate": 0.00015788912407920148, "loss": 0.6762, "step": 2535 }, { "epoch": 3.0, "learning_rate": 0.00015785833065474683, "loss": 0.4121, "step": 2536 }, { "epoch": 3.0, "learning_rate": 0.00015782752898095627, "loss": 0.4532, "step": 2537 }, { "epoch": 3.0, "learning_rate": 0.0001577967190622215, "loss": 0.4847, "step": 2538 }, { "epoch": 3.0, "learning_rate": 0.0001577659009029353, "loss": 0.8313, "step": 2539 }, { "epoch": 3.01, "learning_rate": 0.00015773507450749172, "loss": 0.5304, "step": 2540 }, { "epoch": 3.01, "learning_rate": 0.00015770423988028588, "loss": 0.6003, "step": 2541 }, { "epoch": 3.01, "learning_rate": 0.00015767339702571414, "loss": 0.3988, "step": 2542 }, { "epoch": 3.01, "learning_rate": 0.00015764254594817398, "loss": 0.6133, "step": 2543 }, { "epoch": 3.01, "learning_rate": 0.0001576116866520642, "loss": 0.4858, "step": 2544 }, { "epoch": 3.01, "learning_rate": 0.00015758081914178456, "loss": 0.3691, "step": 2545 }, { "epoch": 3.01, "learning_rate": 0.0001575499434217361, "loss": 0.5441, "step": 2546 }, { "epoch": 3.01, "learning_rate": 0.0001575190594963211, "loss": 0.4605, "step": 2547 }, { "epoch": 3.02, "learning_rate": 0.00015748816736994284, "loss": 0.3681, "step": 2548 }, { "epoch": 3.02, "learning_rate": 0.00015745726704700593, "loss": 0.4113, "step": 2549 }, { "epoch": 3.02, "learning_rate": 0.00015742635853191608, "loss": 0.5233, "step": 2550 }, { "epoch": 3.02, "learning_rate": 0.00015739544182908014, "loss": 0.356, "step": 2551 }, { "epoch": 3.02, "learning_rate": 0.00015736451694290616, "loss": 0.4105, "step": 2552 }, { "epoch": 3.02, "learning_rate": 0.00015733358387780337, "loss": 0.4451, "step": 2553 }, { "epoch": 3.02, "learning_rate": 0.00015730264263818212, "loss": 0.5023, "step": 2554 }, { "epoch": 3.02, "learning_rate": 0.000157271693228454, "loss": 0.3671, "step": 2555 }, { "epoch": 3.03, "learning_rate": 0.0001572407356530317, "loss": 0.7077, "step": 2556 }, { "epoch": 3.03, "learning_rate": 0.00015720976991632913, "loss": 0.4439, "step": 2557 }, { "epoch": 3.03, "learning_rate": 0.00015717879602276122, "loss": 0.5961, "step": 2558 }, { "epoch": 3.03, "learning_rate": 0.0001571478139767443, "loss": 0.4269, "step": 2559 }, { "epoch": 3.03, "learning_rate": 0.00015711682378269565, "loss": 0.3427, "step": 2560 }, { "epoch": 3.03, "learning_rate": 0.00015708582544503386, "loss": 0.5736, "step": 2561 }, { "epoch": 3.03, "learning_rate": 0.00015705481896817854, "loss": 0.3707, "step": 2562 }, { "epoch": 3.03, "learning_rate": 0.0001570238043565506, "loss": 0.4076, "step": 2563 }, { "epoch": 3.03, "learning_rate": 0.000156992781614572, "loss": 0.6514, "step": 2564 }, { "epoch": 3.04, "learning_rate": 0.00015696175074666598, "loss": 0.4012, "step": 2565 }, { "epoch": 3.04, "learning_rate": 0.0001569307117572568, "loss": 0.3492, "step": 2566 }, { "epoch": 3.04, "learning_rate": 0.00015689966465076992, "loss": 0.4121, "step": 2567 }, { "epoch": 3.04, "learning_rate": 0.00015686860943163206, "loss": 0.5769, "step": 2568 }, { "epoch": 3.04, "learning_rate": 0.00015683754610427094, "loss": 0.4872, "step": 2569 }, { "epoch": 3.04, "learning_rate": 0.00015680647467311557, "loss": 0.5518, "step": 2570 }, { "epoch": 3.04, "learning_rate": 0.00015677539514259608, "loss": 0.411, "step": 2571 }, { "epoch": 3.04, "learning_rate": 0.00015674430751714361, "loss": 0.3443, "step": 2572 }, { "epoch": 3.05, "learning_rate": 0.00015671321180119074, "loss": 0.3706, "step": 2573 }, { "epoch": 3.05, "learning_rate": 0.0001566821079991709, "loss": 0.6168, "step": 2574 }, { "epoch": 3.05, "learning_rate": 0.0001566509961155189, "loss": 0.3726, "step": 2575 }, { "epoch": 3.05, "learning_rate": 0.00015661987615467058, "loss": 0.3976, "step": 2576 }, { "epoch": 3.05, "learning_rate": 0.00015658874812106297, "loss": 0.3697, "step": 2577 }, { "epoch": 3.05, "learning_rate": 0.00015655761201913425, "loss": 0.2759, "step": 2578 }, { "epoch": 3.05, "learning_rate": 0.00015652646785332378, "loss": 0.3572, "step": 2579 }, { "epoch": 3.05, "learning_rate": 0.000156495315628072, "loss": 0.5333, "step": 2580 }, { "epoch": 3.06, "learning_rate": 0.00015646415534782056, "loss": 0.4004, "step": 2581 }, { "epoch": 3.06, "learning_rate": 0.0001564329870170122, "loss": 0.4736, "step": 2582 }, { "epoch": 3.06, "learning_rate": 0.00015640181064009088, "loss": 0.4814, "step": 2583 }, { "epoch": 3.06, "learning_rate": 0.00015637062622150168, "loss": 0.3351, "step": 2584 }, { "epoch": 3.06, "learning_rate": 0.00015633943376569081, "loss": 0.4497, "step": 2585 }, { "epoch": 3.06, "learning_rate": 0.00015630823327710558, "loss": 0.4202, "step": 2586 }, { "epoch": 3.06, "learning_rate": 0.00015627702476019457, "loss": 0.5934, "step": 2587 }, { "epoch": 3.06, "learning_rate": 0.0001562458082194074, "loss": 0.4664, "step": 2588 }, { "epoch": 3.06, "learning_rate": 0.00015621458365919487, "loss": 0.4077, "step": 2589 }, { "epoch": 3.07, "learning_rate": 0.00015618335108400893, "loss": 0.5244, "step": 2590 }, { "epoch": 3.07, "learning_rate": 0.00015615211049830268, "loss": 0.5042, "step": 2591 }, { "epoch": 3.07, "learning_rate": 0.00015612086190653027, "loss": 0.3442, "step": 2592 }, { "epoch": 3.07, "learning_rate": 0.00015608960531314717, "loss": 0.6337, "step": 2593 }, { "epoch": 3.07, "learning_rate": 0.00015605834072260984, "loss": 0.3542, "step": 2594 }, { "epoch": 3.07, "learning_rate": 0.0001560270681393759, "loss": 0.5113, "step": 2595 }, { "epoch": 3.07, "learning_rate": 0.0001559957875679042, "loss": 0.4346, "step": 2596 }, { "epoch": 3.07, "learning_rate": 0.00015596449901265463, "loss": 0.5231, "step": 2597 }, { "epoch": 3.08, "learning_rate": 0.00015593320247808822, "loss": 0.5193, "step": 2598 }, { "epoch": 3.08, "learning_rate": 0.0001559018979686673, "loss": 0.3575, "step": 2599 }, { "epoch": 3.08, "learning_rate": 0.00015587058548885505, "loss": 0.6356, "step": 2600 }, { "epoch": 3.08, "learning_rate": 0.00015583926504311605, "loss": 0.3313, "step": 2601 }, { "epoch": 3.08, "learning_rate": 0.00015580793663591585, "loss": 0.356, "step": 2602 }, { "epoch": 3.08, "learning_rate": 0.00015577660027172127, "loss": 0.5498, "step": 2603 }, { "epoch": 3.08, "learning_rate": 0.0001557452559550001, "loss": 0.3973, "step": 2604 }, { "epoch": 3.08, "learning_rate": 0.0001557139036902215, "loss": 0.4751, "step": 2605 }, { "epoch": 3.09, "learning_rate": 0.00015568254348185544, "loss": 0.4297, "step": 2606 }, { "epoch": 3.09, "learning_rate": 0.00015565117533437335, "loss": 0.4299, "step": 2607 }, { "epoch": 3.09, "learning_rate": 0.00015561979925224754, "loss": 0.4651, "step": 2608 }, { "epoch": 3.09, "learning_rate": 0.00015558841523995162, "loss": 0.474, "step": 2609 }, { "epoch": 3.09, "learning_rate": 0.00015555702330196023, "loss": 0.4143, "step": 2610 }, { "epoch": 3.09, "learning_rate": 0.0001555256234427492, "loss": 0.393, "step": 2611 }, { "epoch": 3.09, "learning_rate": 0.00015549421566679546, "loss": 0.3738, "step": 2612 }, { "epoch": 3.09, "learning_rate": 0.00015546279997857704, "loss": 0.4394, "step": 2613 }, { "epoch": 3.09, "learning_rate": 0.0001554313763825732, "loss": 0.3702, "step": 2614 }, { "epoch": 3.1, "learning_rate": 0.00015539994488326418, "loss": 0.4594, "step": 2615 }, { "epoch": 3.1, "learning_rate": 0.00015536850548513147, "loss": 0.3249, "step": 2616 }, { "epoch": 3.1, "learning_rate": 0.00015533705819265764, "loss": 0.3857, "step": 2617 }, { "epoch": 3.1, "learning_rate": 0.0001553056030103264, "loss": 0.3272, "step": 2618 }, { "epoch": 3.1, "learning_rate": 0.00015527413994262257, "loss": 0.5204, "step": 2619 }, { "epoch": 3.1, "learning_rate": 0.00015524266899403206, "loss": 0.3653, "step": 2620 }, { "epoch": 3.1, "learning_rate": 0.000155211190169042, "loss": 0.4698, "step": 2621 }, { "epoch": 3.1, "learning_rate": 0.0001551797034721405, "loss": 0.5949, "step": 2622 }, { "epoch": 3.11, "learning_rate": 0.00015514820890781693, "loss": 0.4074, "step": 2623 }, { "epoch": 3.11, "learning_rate": 0.00015511670648056178, "loss": 0.3586, "step": 2624 }, { "epoch": 3.11, "learning_rate": 0.0001550851961948665, "loss": 0.6494, "step": 2625 }, { "epoch": 3.11, "learning_rate": 0.00015505367805522383, "loss": 0.4914, "step": 2626 }, { "epoch": 3.11, "learning_rate": 0.0001550221520661276, "loss": 0.4594, "step": 2627 }, { "epoch": 3.11, "learning_rate": 0.00015499061823207266, "loss": 0.4102, "step": 2628 }, { "epoch": 3.11, "learning_rate": 0.00015495907655755506, "loss": 0.4229, "step": 2629 }, { "epoch": 3.11, "learning_rate": 0.000154927527047072, "loss": 0.7218, "step": 2630 }, { "epoch": 3.12, "learning_rate": 0.0001548959697051217, "loss": 0.6929, "step": 2631 }, { "epoch": 3.12, "learning_rate": 0.00015486440453620358, "loss": 0.3628, "step": 2632 }, { "epoch": 3.12, "learning_rate": 0.00015483283154481815, "loss": 0.4433, "step": 2633 }, { "epoch": 3.12, "learning_rate": 0.00015480125073546704, "loss": 0.3912, "step": 2634 }, { "epoch": 3.12, "learning_rate": 0.0001547696621126529, "loss": 0.3682, "step": 2635 }, { "epoch": 3.12, "learning_rate": 0.00015473806568087968, "loss": 0.354, "step": 2636 }, { "epoch": 3.12, "learning_rate": 0.0001547064614446523, "loss": 0.4789, "step": 2637 }, { "epoch": 3.12, "learning_rate": 0.0001546748494084768, "loss": 0.382, "step": 2638 }, { "epoch": 3.12, "learning_rate": 0.00015464322957686041, "loss": 0.4954, "step": 2639 }, { "epoch": 3.13, "learning_rate": 0.00015461160195431148, "loss": 0.3273, "step": 2640 }, { "epoch": 3.13, "learning_rate": 0.0001545799665453393, "loss": 0.3414, "step": 2641 }, { "epoch": 3.13, "learning_rate": 0.00015454832335445447, "loss": 0.5479, "step": 2642 }, { "epoch": 3.13, "learning_rate": 0.0001545166723861686, "loss": 0.4963, "step": 2643 }, { "epoch": 3.13, "learning_rate": 0.00015448501364499445, "loss": 0.5547, "step": 2644 }, { "epoch": 3.13, "learning_rate": 0.0001544533471354458, "loss": 0.4637, "step": 2645 }, { "epoch": 3.13, "learning_rate": 0.00015442167286203767, "loss": 0.4248, "step": 2646 }, { "epoch": 3.13, "learning_rate": 0.00015438999082928608, "loss": 0.4213, "step": 2647 }, { "epoch": 3.14, "learning_rate": 0.00015435830104170822, "loss": 0.3734, "step": 2648 }, { "epoch": 3.14, "learning_rate": 0.00015432660350382234, "loss": 0.4627, "step": 2649 }, { "epoch": 3.14, "learning_rate": 0.0001542948982201479, "loss": 0.3422, "step": 2650 }, { "epoch": 3.14, "learning_rate": 0.00015426318519520525, "loss": 0.4409, "step": 2651 }, { "epoch": 3.14, "learning_rate": 0.00015423146443351607, "loss": 0.3717, "step": 2652 }, { "epoch": 3.14, "learning_rate": 0.00015419973593960298, "loss": 0.4349, "step": 2653 }, { "epoch": 3.14, "learning_rate": 0.00015416799971798985, "loss": 0.5349, "step": 2654 }, { "epoch": 3.14, "learning_rate": 0.0001541362557732015, "loss": 0.4511, "step": 2655 }, { "epoch": 3.15, "learning_rate": 0.000154104504109764, "loss": 0.5997, "step": 2656 }, { "epoch": 3.15, "learning_rate": 0.00015407274473220434, "loss": 0.661, "step": 2657 }, { "epoch": 3.15, "learning_rate": 0.00015404097764505083, "loss": 0.3456, "step": 2658 }, { "epoch": 3.15, "learning_rate": 0.00015400920285283268, "loss": 0.3416, "step": 2659 }, { "epoch": 3.15, "learning_rate": 0.00015397742036008034, "loss": 0.4707, "step": 2660 }, { "epoch": 3.15, "learning_rate": 0.00015394563017132526, "loss": 0.3221, "step": 2661 }, { "epoch": 3.15, "learning_rate": 0.00015391383229110007, "loss": 0.6108, "step": 2662 }, { "epoch": 3.15, "learning_rate": 0.00015388202672393834, "loss": 0.5504, "step": 2663 }, { "epoch": 3.15, "learning_rate": 0.00015385021347437498, "loss": 0.3973, "step": 2664 }, { "epoch": 3.16, "learning_rate": 0.00015381839254694583, "loss": 0.5149, "step": 2665 }, { "epoch": 3.16, "learning_rate": 0.00015378656394618787, "loss": 0.5853, "step": 2666 }, { "epoch": 3.16, "learning_rate": 0.0001537547276766391, "loss": 0.517, "step": 2667 }, { "epoch": 3.16, "learning_rate": 0.00015372288374283875, "loss": 0.5485, "step": 2668 }, { "epoch": 3.16, "learning_rate": 0.00015369103214932703, "loss": 0.4907, "step": 2669 }, { "epoch": 3.16, "learning_rate": 0.0001536591729006453, "loss": 0.3169, "step": 2670 }, { "epoch": 3.16, "learning_rate": 0.00015362730600133596, "loss": 0.5431, "step": 2671 }, { "epoch": 3.16, "learning_rate": 0.00015359543145594258, "loss": 0.2586, "step": 2672 }, { "epoch": 3.17, "learning_rate": 0.00015356354926900979, "loss": 0.5251, "step": 2673 }, { "epoch": 3.17, "learning_rate": 0.00015353165944508325, "loss": 0.4104, "step": 2674 }, { "epoch": 3.17, "learning_rate": 0.00015349976198870973, "loss": 0.4825, "step": 2675 }, { "epoch": 3.17, "learning_rate": 0.00015346785690443718, "loss": 0.5274, "step": 2676 }, { "epoch": 3.17, "learning_rate": 0.0001534359441968145, "loss": 0.3878, "step": 2677 }, { "epoch": 3.17, "learning_rate": 0.0001534040238703918, "loss": 0.5132, "step": 2678 }, { "epoch": 3.17, "learning_rate": 0.00015337209592972023, "loss": 0.5145, "step": 2679 }, { "epoch": 3.17, "learning_rate": 0.00015334016037935196, "loss": 0.5548, "step": 2680 }, { "epoch": 3.18, "learning_rate": 0.00015330821722384037, "loss": 0.7494, "step": 2681 }, { "epoch": 3.18, "learning_rate": 0.00015327626646773976, "loss": 0.5569, "step": 2682 }, { "epoch": 3.18, "learning_rate": 0.00015324430811560573, "loss": 0.2622, "step": 2683 }, { "epoch": 3.18, "learning_rate": 0.0001532123421719948, "loss": 0.3749, "step": 2684 }, { "epoch": 3.18, "learning_rate": 0.00015318036864146457, "loss": 0.3959, "step": 2685 }, { "epoch": 3.18, "learning_rate": 0.0001531483875285738, "loss": 0.5243, "step": 2686 }, { "epoch": 3.18, "learning_rate": 0.0001531163988378823, "loss": 0.3115, "step": 2687 }, { "epoch": 3.18, "learning_rate": 0.00015308440257395093, "loss": 0.2385, "step": 2688 }, { "epoch": 3.18, "learning_rate": 0.00015305239874134174, "loss": 0.4431, "step": 2689 }, { "epoch": 3.19, "learning_rate": 0.0001530203873446177, "loss": 0.378, "step": 2690 }, { "epoch": 3.19, "learning_rate": 0.00015298836838834298, "loss": 0.4521, "step": 2691 }, { "epoch": 3.19, "learning_rate": 0.00015295634187708279, "loss": 0.6309, "step": 2692 }, { "epoch": 3.19, "learning_rate": 0.00015292430781540335, "loss": 0.4355, "step": 2693 }, { "epoch": 3.19, "learning_rate": 0.00015289226620787208, "loss": 0.4537, "step": 2694 }, { "epoch": 3.19, "learning_rate": 0.0001528602170590574, "loss": 0.4305, "step": 2695 }, { "epoch": 3.19, "learning_rate": 0.00015282816037352878, "loss": 0.5355, "step": 2696 }, { "epoch": 3.19, "learning_rate": 0.00015279609615585687, "loss": 0.5243, "step": 2697 }, { "epoch": 3.2, "learning_rate": 0.0001527640244106133, "loss": 0.5334, "step": 2698 }, { "epoch": 3.2, "learning_rate": 0.00015273194514237078, "loss": 0.5409, "step": 2699 }, { "epoch": 3.2, "learning_rate": 0.0001526998583557031, "loss": 0.4042, "step": 2700 }, { "epoch": 3.2, "learning_rate": 0.00015266776405518524, "loss": 0.5536, "step": 2701 }, { "epoch": 3.2, "learning_rate": 0.000152635662245393, "loss": 0.2743, "step": 2702 }, { "epoch": 3.2, "learning_rate": 0.00015260355293090353, "loss": 0.4762, "step": 2703 }, { "epoch": 3.2, "learning_rate": 0.00015257143611629482, "loss": 0.4552, "step": 2704 }, { "epoch": 3.2, "learning_rate": 0.0001525393118061461, "loss": 0.5395, "step": 2705 }, { "epoch": 3.21, "learning_rate": 0.0001525071800050375, "loss": 0.4297, "step": 2706 }, { "epoch": 3.21, "learning_rate": 0.00015247504071755046, "loss": 0.364, "step": 2707 }, { "epoch": 3.21, "learning_rate": 0.00015244289394826722, "loss": 0.9499, "step": 2708 }, { "epoch": 3.21, "learning_rate": 0.00015241073970177126, "loss": 0.579, "step": 2709 }, { "epoch": 3.21, "learning_rate": 0.000152378577982647, "loss": 0.3111, "step": 2710 }, { "epoch": 3.21, "learning_rate": 0.0001523464087954801, "loss": 0.3345, "step": 2711 }, { "epoch": 3.21, "learning_rate": 0.00015231423214485715, "loss": 0.4628, "step": 2712 }, { "epoch": 3.21, "learning_rate": 0.00015228204803536586, "loss": 0.4803, "step": 2713 }, { "epoch": 3.21, "learning_rate": 0.0001522498564715949, "loss": 0.4164, "step": 2714 }, { "epoch": 3.22, "learning_rate": 0.00015221765745813417, "loss": 0.6468, "step": 2715 }, { "epoch": 3.22, "learning_rate": 0.00015218545099957449, "loss": 0.4495, "step": 2716 }, { "epoch": 3.22, "learning_rate": 0.00015215323710050785, "loss": 0.4184, "step": 2717 }, { "epoch": 3.22, "eval_loss": 2.9206559658050537, "eval_runtime": 283.9002, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 2717 }, { "epoch": 3.22, "learning_rate": 0.00015212101576552722, "loss": 0.4215, "step": 2718 }, { "epoch": 3.22, "learning_rate": 0.00015208878699922668, "loss": 1.4488, "step": 2719 }, { "epoch": 3.22, "learning_rate": 0.0001520565508062013, "loss": 0.4449, "step": 2720 }, { "epoch": 3.22, "learning_rate": 0.0001520243071910473, "loss": 0.2853, "step": 2721 }, { "epoch": 3.22, "learning_rate": 0.00015199205615836191, "loss": 0.4572, "step": 2722 }, { "epoch": 3.23, "learning_rate": 0.00015195979771274342, "loss": 0.4436, "step": 2723 }, { "epoch": 3.23, "learning_rate": 0.0001519275318587912, "loss": 0.38, "step": 2724 }, { "epoch": 3.23, "learning_rate": 0.00015189525860110563, "loss": 0.4956, "step": 2725 }, { "epoch": 3.23, "learning_rate": 0.00015186297794428816, "loss": 0.8514, "step": 2726 }, { "epoch": 3.23, "learning_rate": 0.00015183068989294132, "loss": 0.4518, "step": 2727 }, { "epoch": 3.23, "learning_rate": 0.00015179839445166873, "loss": 0.5581, "step": 2728 }, { "epoch": 3.23, "learning_rate": 0.00015176609162507497, "loss": 0.5828, "step": 2729 }, { "epoch": 3.23, "learning_rate": 0.00015173378141776568, "loss": 0.4109, "step": 2730 }, { "epoch": 3.24, "learning_rate": 0.00015170146383434768, "loss": 0.5762, "step": 2731 }, { "epoch": 3.24, "learning_rate": 0.00015166913887942868, "loss": 0.4502, "step": 2732 }, { "epoch": 3.24, "learning_rate": 0.00015163680655761757, "loss": 0.3736, "step": 2733 }, { "epoch": 3.24, "learning_rate": 0.00015160446687352417, "loss": 0.3771, "step": 2734 }, { "epoch": 3.24, "learning_rate": 0.00015157211983175947, "loss": 0.469, "step": 2735 }, { "epoch": 3.24, "learning_rate": 0.00015153976543693542, "loss": 0.665, "step": 2736 }, { "epoch": 3.24, "learning_rate": 0.00015150740369366508, "loss": 0.3495, "step": 2737 }, { "epoch": 3.24, "learning_rate": 0.0001514750346065625, "loss": 0.4513, "step": 2738 }, { "epoch": 3.24, "learning_rate": 0.0001514426581802428, "loss": 0.4571, "step": 2739 }, { "epoch": 3.25, "learning_rate": 0.00015141027441932216, "loss": 0.4197, "step": 2740 }, { "epoch": 3.25, "learning_rate": 0.00015137788332841785, "loss": 0.3396, "step": 2741 }, { "epoch": 3.25, "learning_rate": 0.00015134548491214806, "loss": 0.3547, "step": 2742 }, { "epoch": 3.25, "learning_rate": 0.00015131307917513214, "loss": 0.3073, "step": 2743 }, { "epoch": 3.25, "learning_rate": 0.00015128066612199044, "loss": 0.7091, "step": 2744 }, { "epoch": 3.25, "learning_rate": 0.00015124824575734428, "loss": 0.2845, "step": 2745 }, { "epoch": 3.25, "learning_rate": 0.00015121581808581622, "loss": 0.2903, "step": 2746 }, { "epoch": 3.25, "learning_rate": 0.00015118338311202964, "loss": 0.4065, "step": 2747 }, { "epoch": 3.26, "learning_rate": 0.00015115094084060916, "loss": 0.6152, "step": 2748 }, { "epoch": 3.26, "learning_rate": 0.00015111849127618022, "loss": 0.5352, "step": 2749 }, { "epoch": 3.26, "learning_rate": 0.0001510860344233695, "loss": 0.414, "step": 2750 }, { "epoch": 3.26, "learning_rate": 0.00015105357028680457, "loss": 0.4756, "step": 2751 }, { "epoch": 3.26, "learning_rate": 0.00015102109887111422, "loss": 0.4644, "step": 2752 }, { "epoch": 3.26, "learning_rate": 0.00015098862018092808, "loss": 0.4231, "step": 2753 }, { "epoch": 3.26, "learning_rate": 0.00015095613422087692, "loss": 0.4617, "step": 2754 }, { "epoch": 3.26, "learning_rate": 0.0001509236409955925, "loss": 0.5876, "step": 2755 }, { "epoch": 3.27, "learning_rate": 0.0001508911405097077, "loss": 0.5696, "step": 2756 }, { "epoch": 3.27, "learning_rate": 0.00015085863276785637, "loss": 0.3826, "step": 2757 }, { "epoch": 3.27, "learning_rate": 0.0001508261177746734, "loss": 0.4338, "step": 2758 }, { "epoch": 3.27, "learning_rate": 0.0001507935955347947, "loss": 0.3546, "step": 2759 }, { "epoch": 3.27, "learning_rate": 0.00015076106605285724, "loss": 0.413, "step": 2760 }, { "epoch": 3.27, "learning_rate": 0.000150728529333499, "loss": 0.3954, "step": 2761 }, { "epoch": 3.27, "learning_rate": 0.00015069598538135906, "loss": 0.5214, "step": 2762 }, { "epoch": 3.27, "learning_rate": 0.0001506634342010774, "loss": 0.5239, "step": 2763 }, { "epoch": 3.27, "learning_rate": 0.00015063087579729519, "loss": 0.8681, "step": 2764 }, { "epoch": 3.28, "learning_rate": 0.00015059831017465449, "loss": 0.4616, "step": 2765 }, { "epoch": 3.28, "learning_rate": 0.00015056573733779848, "loss": 0.4721, "step": 2766 }, { "epoch": 3.28, "learning_rate": 0.00015053315729137128, "loss": 0.4449, "step": 2767 }, { "epoch": 3.28, "learning_rate": 0.0001505005700400182, "loss": 0.569, "step": 2768 }, { "epoch": 3.28, "learning_rate": 0.00015046797558838535, "loss": 0.4926, "step": 2769 }, { "epoch": 3.28, "learning_rate": 0.00015043537394112007, "loss": 0.462, "step": 2770 }, { "epoch": 3.28, "learning_rate": 0.00015040276510287063, "loss": 0.6983, "step": 2771 }, { "epoch": 3.28, "learning_rate": 0.00015037014907828632, "loss": 0.4644, "step": 2772 }, { "epoch": 3.29, "learning_rate": 0.0001503375258720175, "loss": 0.5924, "step": 2773 }, { "epoch": 3.29, "learning_rate": 0.00015030489548871544, "loss": 0.5282, "step": 2774 }, { "epoch": 3.29, "learning_rate": 0.00015027225793303264, "loss": 0.4757, "step": 2775 }, { "epoch": 3.29, "learning_rate": 0.00015023961320962247, "loss": 0.5014, "step": 2776 }, { "epoch": 3.29, "learning_rate": 0.0001502069613231393, "loss": 0.3455, "step": 2777 }, { "epoch": 3.29, "learning_rate": 0.00015017430227823864, "loss": 0.4525, "step": 2778 }, { "epoch": 3.29, "learning_rate": 0.0001501416360795769, "loss": 0.51, "step": 2779 }, { "epoch": 3.29, "learning_rate": 0.00015010896273181165, "loss": 0.3766, "step": 2780 }, { "epoch": 3.3, "learning_rate": 0.0001500762822396013, "loss": 0.3162, "step": 2781 }, { "epoch": 3.3, "learning_rate": 0.00015004359460760546, "loss": 0.406, "step": 2782 }, { "epoch": 3.3, "learning_rate": 0.00015001089984048463, "loss": 0.4671, "step": 2783 }, { "epoch": 3.3, "learning_rate": 0.00014997819794290034, "loss": 0.4299, "step": 2784 }, { "epoch": 3.3, "learning_rate": 0.00014994548891951524, "loss": 0.5494, "step": 2785 }, { "epoch": 3.3, "learning_rate": 0.0001499127727749929, "loss": 0.351, "step": 2786 }, { "epoch": 3.3, "learning_rate": 0.00014988004951399785, "loss": 0.3807, "step": 2787 }, { "epoch": 3.3, "learning_rate": 0.00014984731914119586, "loss": 0.3999, "step": 2788 }, { "epoch": 3.3, "learning_rate": 0.0001498145816612534, "loss": 0.7609, "step": 2789 }, { "epoch": 3.31, "learning_rate": 0.00014978183707883827, "loss": 0.4466, "step": 2790 }, { "epoch": 3.31, "learning_rate": 0.00014974908539861905, "loss": 0.592, "step": 2791 }, { "epoch": 3.31, "learning_rate": 0.00014971632662526545, "loss": 0.4786, "step": 2792 }, { "epoch": 3.31, "learning_rate": 0.00014968356076344814, "loss": 0.4087, "step": 2793 }, { "epoch": 3.31, "learning_rate": 0.0001496507878178388, "loss": 0.3811, "step": 2794 }, { "epoch": 3.31, "learning_rate": 0.00014961800779311014, "loss": 0.4091, "step": 2795 }, { "epoch": 3.31, "learning_rate": 0.00014958522069393593, "loss": 0.6861, "step": 2796 }, { "epoch": 3.31, "learning_rate": 0.00014955242652499084, "loss": 0.3346, "step": 2797 }, { "epoch": 3.32, "learning_rate": 0.00014951962529095064, "loss": 0.5417, "step": 2798 }, { "epoch": 3.32, "learning_rate": 0.000149486816996492, "loss": 0.7325, "step": 2799 }, { "epoch": 3.32, "learning_rate": 0.00014945400164629278, "loss": 0.5007, "step": 2800 }, { "epoch": 3.32, "learning_rate": 0.00014942117924503164, "loss": 0.4217, "step": 2801 }, { "epoch": 3.32, "learning_rate": 0.00014938834979738835, "loss": 0.5265, "step": 2802 }, { "epoch": 3.32, "learning_rate": 0.00014935551330804372, "loss": 0.4376, "step": 2803 }, { "epoch": 3.32, "learning_rate": 0.0001493226697816795, "loss": 0.5068, "step": 2804 }, { "epoch": 3.32, "learning_rate": 0.00014928981922297842, "loss": 0.6248, "step": 2805 }, { "epoch": 3.33, "learning_rate": 0.0001492569616366243, "loss": 0.593, "step": 2806 }, { "epoch": 3.33, "learning_rate": 0.0001492240970273019, "loss": 0.6713, "step": 2807 }, { "epoch": 3.33, "learning_rate": 0.00014919122539969697, "loss": 0.5736, "step": 2808 }, { "epoch": 3.33, "learning_rate": 0.00014915834675849633, "loss": 0.3006, "step": 2809 }, { "epoch": 3.33, "learning_rate": 0.00014912546110838775, "loss": 0.5175, "step": 2810 }, { "epoch": 3.33, "learning_rate": 0.00014909256845405998, "loss": 0.52, "step": 2811 }, { "epoch": 3.33, "learning_rate": 0.00014905966880020282, "loss": 0.5491, "step": 2812 }, { "epoch": 3.33, "learning_rate": 0.00014902676215150702, "loss": 0.6007, "step": 2813 }, { "epoch": 3.33, "learning_rate": 0.0001489938485126644, "loss": 0.6552, "step": 2814 }, { "epoch": 3.34, "learning_rate": 0.00014896092788836763, "loss": 0.3624, "step": 2815 }, { "epoch": 3.34, "learning_rate": 0.0001489280002833106, "loss": 0.2626, "step": 2816 }, { "epoch": 3.34, "learning_rate": 0.00014889506570218796, "loss": 0.409, "step": 2817 }, { "epoch": 3.34, "learning_rate": 0.00014886212414969553, "loss": 0.473, "step": 2818 }, { "epoch": 3.34, "learning_rate": 0.00014882917563052998, "loss": 0.4205, "step": 2819 }, { "epoch": 3.34, "learning_rate": 0.00014879622014938915, "loss": 0.4603, "step": 2820 }, { "epoch": 3.34, "learning_rate": 0.0001487632577109717, "loss": 0.3522, "step": 2821 }, { "epoch": 3.34, "learning_rate": 0.0001487302883199774, "loss": 0.3787, "step": 2822 }, { "epoch": 3.35, "learning_rate": 0.00014869731198110695, "loss": 0.6, "step": 2823 }, { "epoch": 3.35, "learning_rate": 0.000148664328699062, "loss": 0.4291, "step": 2824 }, { "epoch": 3.35, "learning_rate": 0.00014863133847854533, "loss": 0.4358, "step": 2825 }, { "epoch": 3.35, "learning_rate": 0.0001485983413242606, "loss": 0.4144, "step": 2826 }, { "epoch": 3.35, "learning_rate": 0.0001485653372409125, "loss": 0.842, "step": 2827 }, { "epoch": 3.35, "learning_rate": 0.00014853232623320662, "loss": 0.3398, "step": 2828 }, { "epoch": 3.35, "learning_rate": 0.00014849930830584972, "loss": 0.5005, "step": 2829 }, { "epoch": 3.35, "learning_rate": 0.00014846628346354933, "loss": 0.5777, "step": 2830 }, { "epoch": 3.36, "learning_rate": 0.00014843325171101413, "loss": 0.3953, "step": 2831 }, { "epoch": 3.36, "learning_rate": 0.00014840021305295373, "loss": 0.4056, "step": 2832 }, { "epoch": 3.36, "learning_rate": 0.00014836716749407872, "loss": 0.7682, "step": 2833 }, { "epoch": 3.36, "learning_rate": 0.0001483341150391006, "loss": 0.3208, "step": 2834 }, { "epoch": 3.36, "learning_rate": 0.00014830105569273204, "loss": 0.4317, "step": 2835 }, { "epoch": 3.36, "learning_rate": 0.00014826798945968654, "loss": 0.363, "step": 2836 }, { "epoch": 3.36, "learning_rate": 0.00014823491634467862, "loss": 0.3784, "step": 2837 }, { "epoch": 3.36, "learning_rate": 0.00014820183635242374, "loss": 0.9267, "step": 2838 }, { "epoch": 3.36, "learning_rate": 0.0001481687494876385, "loss": 0.4245, "step": 2839 }, { "epoch": 3.37, "learning_rate": 0.00014813565575504022, "loss": 0.3929, "step": 2840 }, { "epoch": 3.37, "learning_rate": 0.00014810255515934747, "loss": 0.5171, "step": 2841 }, { "epoch": 3.37, "learning_rate": 0.00014806944770527958, "loss": 0.5181, "step": 2842 }, { "epoch": 3.37, "learning_rate": 0.00014803633339755703, "loss": 0.4765, "step": 2843 }, { "epoch": 3.37, "learning_rate": 0.00014800321224090114, "loss": 0.4433, "step": 2844 }, { "epoch": 3.37, "learning_rate": 0.00014797008424003428, "loss": 0.461, "step": 2845 }, { "epoch": 3.37, "learning_rate": 0.0001479369493996798, "loss": 0.5688, "step": 2846 }, { "epoch": 3.37, "learning_rate": 0.00014790380772456197, "loss": 0.4822, "step": 2847 }, { "epoch": 3.38, "learning_rate": 0.0001478706592194061, "loss": 0.4993, "step": 2848 }, { "epoch": 3.38, "learning_rate": 0.00014783750388893842, "loss": 0.3967, "step": 2849 }, { "epoch": 3.38, "learning_rate": 0.00014780434173788617, "loss": 0.4708, "step": 2850 }, { "epoch": 3.38, "learning_rate": 0.00014777117277097758, "loss": 0.5721, "step": 2851 }, { "epoch": 3.38, "learning_rate": 0.00014773799699294176, "loss": 0.5276, "step": 2852 }, { "epoch": 3.38, "learning_rate": 0.00014770481440850891, "loss": 0.4135, "step": 2853 }, { "epoch": 3.38, "learning_rate": 0.0001476716250224101, "loss": 0.716, "step": 2854 }, { "epoch": 3.38, "learning_rate": 0.00014763842883937743, "loss": 0.3663, "step": 2855 }, { "epoch": 3.39, "learning_rate": 0.00014760522586414396, "loss": 0.4105, "step": 2856 }, { "epoch": 3.39, "learning_rate": 0.00014757201610144372, "loss": 0.4554, "step": 2857 }, { "epoch": 3.39, "learning_rate": 0.00014753879955601163, "loss": 0.4366, "step": 2858 }, { "epoch": 3.39, "learning_rate": 0.0001475055762325837, "loss": 0.3752, "step": 2859 }, { "epoch": 3.39, "learning_rate": 0.00014747234613589685, "loss": 0.3747, "step": 2860 }, { "epoch": 3.39, "learning_rate": 0.000147439109270689, "loss": 0.5533, "step": 2861 }, { "epoch": 3.39, "learning_rate": 0.00014740586564169892, "loss": 0.4962, "step": 2862 }, { "epoch": 3.39, "learning_rate": 0.00014737261525366648, "loss": 0.5318, "step": 2863 }, { "epoch": 3.39, "learning_rate": 0.00014733935811133244, "loss": 0.4592, "step": 2864 }, { "epoch": 3.4, "learning_rate": 0.00014730609421943855, "loss": 0.429, "step": 2865 }, { "epoch": 3.4, "learning_rate": 0.00014727282358272754, "loss": 0.4163, "step": 2866 }, { "epoch": 3.4, "learning_rate": 0.00014723954620594304, "loss": 0.4811, "step": 2867 }, { "epoch": 3.4, "learning_rate": 0.0001472062620938297, "loss": 0.4662, "step": 2868 }, { "epoch": 3.4, "learning_rate": 0.00014717297125113311, "loss": 0.531, "step": 2869 }, { "epoch": 3.4, "learning_rate": 0.0001471396736825998, "loss": 0.3233, "step": 2870 }, { "epoch": 3.4, "learning_rate": 0.00014710636939297724, "loss": 0.4171, "step": 2871 }, { "epoch": 3.4, "learning_rate": 0.000147073058387014, "loss": 0.5412, "step": 2872 }, { "epoch": 3.41, "learning_rate": 0.00014703974066945943, "loss": 0.4357, "step": 2873 }, { "epoch": 3.41, "learning_rate": 0.00014700641624506392, "loss": 0.3889, "step": 2874 }, { "epoch": 3.41, "learning_rate": 0.0001469730851185788, "loss": 0.456, "step": 2875 }, { "epoch": 3.41, "learning_rate": 0.00014693974729475636, "loss": 0.4365, "step": 2876 }, { "epoch": 3.41, "learning_rate": 0.0001469064027783499, "loss": 0.3947, "step": 2877 }, { "epoch": 3.41, "learning_rate": 0.00014687305157411355, "loss": 0.5718, "step": 2878 }, { "epoch": 3.41, "learning_rate": 0.0001468396936868025, "loss": 0.4652, "step": 2879 }, { "epoch": 3.41, "learning_rate": 0.00014680632912117286, "loss": 0.4242, "step": 2880 }, { "epoch": 3.42, "learning_rate": 0.0001467729578819817, "loss": 0.5045, "step": 2881 }, { "epoch": 3.42, "learning_rate": 0.00014673957997398695, "loss": 0.4098, "step": 2882 }, { "epoch": 3.42, "learning_rate": 0.00014670619540194766, "loss": 0.597, "step": 2883 }, { "epoch": 3.42, "learning_rate": 0.00014667280417062374, "loss": 0.5208, "step": 2884 }, { "epoch": 3.42, "learning_rate": 0.00014663940628477598, "loss": 0.4881, "step": 2885 }, { "epoch": 3.42, "learning_rate": 0.00014660600174916627, "loss": 0.5234, "step": 2886 }, { "epoch": 3.42, "learning_rate": 0.0001465725905685573, "loss": 0.439, "step": 2887 }, { "epoch": 3.42, "learning_rate": 0.00014653917274771284, "loss": 0.4498, "step": 2888 }, { "epoch": 3.42, "learning_rate": 0.00014650574829139747, "loss": 0.4837, "step": 2889 }, { "epoch": 3.43, "learning_rate": 0.00014647231720437686, "loss": 0.4232, "step": 2890 }, { "epoch": 3.43, "learning_rate": 0.00014643887949141753, "loss": 0.4467, "step": 2891 }, { "epoch": 3.43, "learning_rate": 0.00014640543515728695, "loss": 0.3566, "step": 2892 }, { "epoch": 3.43, "learning_rate": 0.00014637198420675354, "loss": 0.3888, "step": 2893 }, { "epoch": 3.43, "learning_rate": 0.00014633852664458673, "loss": 0.326, "step": 2894 }, { "epoch": 3.43, "learning_rate": 0.0001463050624755568, "loss": 0.3608, "step": 2895 }, { "epoch": 3.43, "learning_rate": 0.00014627159170443502, "loss": 0.5326, "step": 2896 }, { "epoch": 3.43, "learning_rate": 0.00014623811433599359, "loss": 0.3171, "step": 2897 }, { "epoch": 3.44, "learning_rate": 0.00014620463037500568, "loss": 0.4619, "step": 2898 }, { "epoch": 3.44, "learning_rate": 0.00014617113982624526, "loss": 0.7739, "step": 2899 }, { "epoch": 3.44, "learning_rate": 0.00014613764269448751, "loss": 0.4327, "step": 2900 }, { "epoch": 3.44, "learning_rate": 0.0001461041389845083, "loss": 0.6078, "step": 2901 }, { "epoch": 3.44, "learning_rate": 0.00014607062870108456, "loss": 0.3863, "step": 2902 }, { "epoch": 3.44, "learning_rate": 0.00014603711184899408, "loss": 0.4787, "step": 2903 }, { "epoch": 3.44, "learning_rate": 0.00014600358843301568, "loss": 0.2997, "step": 2904 }, { "epoch": 3.44, "learning_rate": 0.00014597005845792905, "loss": 0.3657, "step": 2905 }, { "epoch": 3.45, "learning_rate": 0.00014593652192851486, "loss": 0.334, "step": 2906 }, { "epoch": 3.45, "learning_rate": 0.00014590297884955463, "loss": 0.6809, "step": 2907 }, { "epoch": 3.45, "learning_rate": 0.0001458694292258309, "loss": 0.4739, "step": 2908 }, { "epoch": 3.45, "learning_rate": 0.00014583587306212713, "loss": 0.4139, "step": 2909 }, { "epoch": 3.45, "learning_rate": 0.00014580231036322768, "loss": 0.3307, "step": 2910 }, { "epoch": 3.45, "learning_rate": 0.00014576874113391789, "loss": 0.4155, "step": 2911 }, { "epoch": 3.45, "learning_rate": 0.00014573516537898394, "loss": 0.4461, "step": 2912 }, { "epoch": 3.45, "learning_rate": 0.00014570158310321305, "loss": 0.4775, "step": 2913 }, { "epoch": 3.45, "learning_rate": 0.0001456679943113933, "loss": 0.344, "step": 2914 }, { "epoch": 3.46, "learning_rate": 0.00014563439900831373, "loss": 0.3568, "step": 2915 }, { "epoch": 3.46, "learning_rate": 0.00014560079719876424, "loss": 0.3808, "step": 2916 }, { "epoch": 3.46, "learning_rate": 0.0001455671888875358, "loss": 0.5467, "step": 2917 }, { "epoch": 3.46, "learning_rate": 0.00014553357407942022, "loss": 0.5267, "step": 2918 }, { "epoch": 3.46, "learning_rate": 0.00014549995277921015, "loss": 0.4476, "step": 2919 }, { "epoch": 3.46, "learning_rate": 0.00014546632499169937, "loss": 0.4463, "step": 2920 }, { "epoch": 3.46, "learning_rate": 0.00014543269072168235, "loss": 0.5553, "step": 2921 }, { "epoch": 3.46, "learning_rate": 0.00014539904997395468, "loss": 0.5476, "step": 2922 }, { "epoch": 3.47, "learning_rate": 0.0001453654027533128, "loss": 0.4443, "step": 2923 }, { "epoch": 3.47, "learning_rate": 0.00014533174906455404, "loss": 0.4353, "step": 2924 }, { "epoch": 3.47, "learning_rate": 0.00014529808891247667, "loss": 0.4479, "step": 2925 }, { "epoch": 3.47, "learning_rate": 0.00014526442230187995, "loss": 0.3951, "step": 2926 }, { "epoch": 3.47, "eval_loss": 2.882225751876831, "eval_runtime": 283.9462, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 2926 }, { "epoch": 3.47, "learning_rate": 0.00014523074923756394, "loss": 0.679, "step": 2927 }, { "epoch": 3.47, "learning_rate": 0.0001451970697243297, "loss": 0.4178, "step": 2928 }, { "epoch": 3.47, "learning_rate": 0.0001451633837669792, "loss": 0.4121, "step": 2929 }, { "epoch": 3.47, "learning_rate": 0.00014512969137031538, "loss": 0.3929, "step": 2930 }, { "epoch": 3.48, "learning_rate": 0.00014509599253914195, "loss": 0.366, "step": 2931 }, { "epoch": 3.48, "learning_rate": 0.0001450622872782637, "loss": 0.3528, "step": 2932 }, { "epoch": 3.48, "learning_rate": 0.00014502857559248617, "loss": 0.5003, "step": 2933 }, { "epoch": 3.48, "learning_rate": 0.00014499485748661604, "loss": 0.4901, "step": 2934 }, { "epoch": 3.48, "learning_rate": 0.00014496113296546067, "loss": 0.4538, "step": 2935 }, { "epoch": 3.48, "learning_rate": 0.00014492740203382847, "loss": 0.4549, "step": 2936 }, { "epoch": 3.48, "learning_rate": 0.0001448936646965288, "loss": 0.5464, "step": 2937 }, { "epoch": 3.48, "learning_rate": 0.00014485992095837177, "loss": 0.43, "step": 2938 }, { "epoch": 3.48, "learning_rate": 0.00014482617082416858, "loss": 0.5893, "step": 2939 }, { "epoch": 3.49, "learning_rate": 0.0001447924142987312, "loss": 0.4947, "step": 2940 }, { "epoch": 3.49, "learning_rate": 0.00014475865138687262, "loss": 0.4903, "step": 2941 }, { "epoch": 3.49, "learning_rate": 0.0001447248820934067, "loss": 0.4933, "step": 2942 }, { "epoch": 3.49, "learning_rate": 0.00014469110642314817, "loss": 0.4516, "step": 2943 }, { "epoch": 3.49, "learning_rate": 0.0001446573243809127, "loss": 0.469, "step": 2944 }, { "epoch": 3.49, "learning_rate": 0.00014462353597151684, "loss": 0.6531, "step": 2945 }, { "epoch": 3.49, "learning_rate": 0.00014458974119977818, "loss": 0.2754, "step": 2946 }, { "epoch": 3.49, "learning_rate": 0.0001445559400705151, "loss": 0.5676, "step": 2947 }, { "epoch": 3.5, "learning_rate": 0.00014452213258854684, "loss": 0.5903, "step": 2948 }, { "epoch": 3.5, "learning_rate": 0.00014448831875869364, "loss": 0.5022, "step": 2949 }, { "epoch": 3.5, "learning_rate": 0.0001444544985857766, "loss": 0.3509, "step": 2950 }, { "epoch": 3.5, "learning_rate": 0.00014442067207461775, "loss": 0.3921, "step": 2951 }, { "epoch": 3.5, "learning_rate": 0.00014438683923004005, "loss": 0.4997, "step": 2952 }, { "epoch": 3.5, "learning_rate": 0.00014435300005686728, "loss": 0.6218, "step": 2953 }, { "epoch": 3.5, "learning_rate": 0.00014431915455992414, "loss": 0.4097, "step": 2954 }, { "epoch": 3.5, "learning_rate": 0.00014428530274403632, "loss": 0.3478, "step": 2955 }, { "epoch": 3.51, "learning_rate": 0.00014425144461403035, "loss": 0.4506, "step": 2956 }, { "epoch": 3.51, "learning_rate": 0.00014421758017473362, "loss": 0.4025, "step": 2957 }, { "epoch": 3.51, "learning_rate": 0.00014418370943097448, "loss": 0.3838, "step": 2958 }, { "epoch": 3.51, "learning_rate": 0.00014414983238758217, "loss": 0.6366, "step": 2959 }, { "epoch": 3.51, "learning_rate": 0.00014411594904938682, "loss": 0.4649, "step": 2960 }, { "epoch": 3.51, "learning_rate": 0.00014408205942121943, "loss": 0.3361, "step": 2961 }, { "epoch": 3.51, "learning_rate": 0.00014404816350791188, "loss": 0.3692, "step": 2962 }, { "epoch": 3.51, "learning_rate": 0.0001440142613142971, "loss": 0.6162, "step": 2963 }, { "epoch": 3.52, "learning_rate": 0.00014398035284520874, "loss": 0.5935, "step": 2964 }, { "epoch": 3.52, "learning_rate": 0.0001439464381054814, "loss": 0.545, "step": 2965 }, { "epoch": 3.52, "learning_rate": 0.00014391251709995061, "loss": 0.4178, "step": 2966 }, { "epoch": 3.52, "learning_rate": 0.00014387858983345276, "loss": 0.5552, "step": 2967 }, { "epoch": 3.52, "learning_rate": 0.0001438446563108251, "loss": 0.4506, "step": 2968 }, { "epoch": 3.52, "learning_rate": 0.00014381071653690587, "loss": 0.429, "step": 2969 }, { "epoch": 3.52, "learning_rate": 0.00014377677051653404, "loss": 0.3897, "step": 2970 }, { "epoch": 3.52, "learning_rate": 0.0001437428182545497, "loss": 0.4663, "step": 2971 }, { "epoch": 3.52, "learning_rate": 0.00014370885975579364, "loss": 0.4643, "step": 2972 }, { "epoch": 3.53, "learning_rate": 0.0001436748950251076, "loss": 0.5433, "step": 2973 }, { "epoch": 3.53, "learning_rate": 0.0001436409240673342, "loss": 0.4967, "step": 2974 }, { "epoch": 3.53, "learning_rate": 0.000143606946887317, "loss": 0.3717, "step": 2975 }, { "epoch": 3.53, "learning_rate": 0.00014357296348990037, "loss": 0.4166, "step": 2976 }, { "epoch": 3.53, "learning_rate": 0.0001435389738799296, "loss": 0.455, "step": 2977 }, { "epoch": 3.53, "learning_rate": 0.00014350497806225087, "loss": 0.4603, "step": 2978 }, { "epoch": 3.53, "learning_rate": 0.00014347097604171127, "loss": 0.4325, "step": 2979 }, { "epoch": 3.53, "learning_rate": 0.0001434369678231587, "loss": 0.4375, "step": 2980 }, { "epoch": 3.54, "learning_rate": 0.00014340295341144202, "loss": 0.4932, "step": 2981 }, { "epoch": 3.54, "learning_rate": 0.00014336893281141096, "loss": 0.5264, "step": 2982 }, { "epoch": 3.54, "learning_rate": 0.00014333490602791608, "loss": 0.4677, "step": 2983 }, { "epoch": 3.54, "learning_rate": 0.00014330087306580887, "loss": 0.6505, "step": 2984 }, { "epoch": 3.54, "learning_rate": 0.00014326683392994167, "loss": 0.4451, "step": 2985 }, { "epoch": 3.54, "learning_rate": 0.00014323278862516775, "loss": 0.4025, "step": 2986 }, { "epoch": 3.54, "learning_rate": 0.0001431987371563412, "loss": 0.5084, "step": 2987 }, { "epoch": 3.54, "learning_rate": 0.000143164679528317, "loss": 0.4806, "step": 2988 }, { "epoch": 3.55, "learning_rate": 0.00014313061574595115, "loss": 0.3954, "step": 2989 }, { "epoch": 3.55, "learning_rate": 0.00014309654581410024, "loss": 0.4339, "step": 2990 }, { "epoch": 3.55, "learning_rate": 0.000143062469737622, "loss": 0.6739, "step": 2991 }, { "epoch": 3.55, "learning_rate": 0.00014302838752137487, "loss": 0.6414, "step": 2992 }, { "epoch": 3.55, "learning_rate": 0.00014299429917021827, "loss": 0.5075, "step": 2993 }, { "epoch": 3.55, "learning_rate": 0.00014296020468901246, "loss": 0.4105, "step": 2994 }, { "epoch": 3.55, "learning_rate": 0.00014292610408261856, "loss": 0.7371, "step": 2995 }, { "epoch": 3.55, "learning_rate": 0.00014289199735589852, "loss": 0.7485, "step": 2996 }, { "epoch": 3.55, "learning_rate": 0.00014285788451371534, "loss": 0.7629, "step": 2997 }, { "epoch": 3.56, "learning_rate": 0.00014282376556093264, "loss": 0.3849, "step": 2998 }, { "epoch": 3.56, "learning_rate": 0.00014278964050241512, "loss": 0.5355, "step": 2999 }, { "epoch": 3.56, "learning_rate": 0.00014275550934302823, "loss": 0.4077, "step": 3000 }, { "epoch": 3.56, "learning_rate": 0.00014272137208763832, "loss": 0.5352, "step": 3001 }, { "epoch": 3.56, "learning_rate": 0.00014268722874111265, "loss": 0.5257, "step": 3002 }, { "epoch": 3.56, "learning_rate": 0.00014265307930831932, "loss": 0.4265, "step": 3003 }, { "epoch": 3.56, "learning_rate": 0.00014261892379412728, "loss": 0.5776, "step": 3004 }, { "epoch": 3.56, "learning_rate": 0.0001425847622034063, "loss": 0.3521, "step": 3005 }, { "epoch": 3.57, "learning_rate": 0.00014255059454102722, "loss": 0.6203, "step": 3006 }, { "epoch": 3.57, "learning_rate": 0.00014251642081186146, "loss": 0.5238, "step": 3007 }, { "epoch": 3.57, "learning_rate": 0.00014248224102078152, "loss": 0.3887, "step": 3008 }, { "epoch": 3.57, "learning_rate": 0.00014244805517266067, "loss": 0.5001, "step": 3009 }, { "epoch": 3.57, "learning_rate": 0.0001424138632723731, "loss": 0.555, "step": 3010 }, { "epoch": 3.57, "learning_rate": 0.0001423796653247938, "loss": 0.6137, "step": 3011 }, { "epoch": 3.57, "learning_rate": 0.00014234546133479867, "loss": 0.8052, "step": 3012 }, { "epoch": 3.57, "learning_rate": 0.0001423112513072644, "loss": 0.5392, "step": 3013 }, { "epoch": 3.58, "learning_rate": 0.00014227703524706867, "loss": 0.5067, "step": 3014 }, { "epoch": 3.58, "learning_rate": 0.0001422428131590899, "loss": 0.4016, "step": 3015 }, { "epoch": 3.58, "learning_rate": 0.00014220858504820742, "loss": 0.4165, "step": 3016 }, { "epoch": 3.58, "learning_rate": 0.00014217435091930141, "loss": 0.7395, "step": 3017 }, { "epoch": 3.58, "learning_rate": 0.00014214011077725292, "loss": 0.4985, "step": 3018 }, { "epoch": 3.58, "learning_rate": 0.00014210586462694384, "loss": 0.4821, "step": 3019 }, { "epoch": 3.58, "learning_rate": 0.00014207161247325691, "loss": 0.6046, "step": 3020 }, { "epoch": 3.58, "learning_rate": 0.00014203735432107576, "loss": 0.568, "step": 3021 }, { "epoch": 3.58, "learning_rate": 0.00014200309017528486, "loss": 0.7383, "step": 3022 }, { "epoch": 3.59, "learning_rate": 0.0001419688200407695, "loss": 0.5296, "step": 3023 }, { "epoch": 3.59, "learning_rate": 0.00014193454392241592, "loss": 0.6391, "step": 3024 }, { "epoch": 3.59, "learning_rate": 0.00014190026182511102, "loss": 0.4523, "step": 3025 }, { "epoch": 3.59, "learning_rate": 0.0001418659737537428, "loss": 0.482, "step": 3026 }, { "epoch": 3.59, "learning_rate": 0.00014183167971319998, "loss": 0.4519, "step": 3027 }, { "epoch": 3.59, "learning_rate": 0.00014179737970837207, "loss": 0.4156, "step": 3028 }, { "epoch": 3.59, "learning_rate": 0.00014176307374414956, "loss": 0.5142, "step": 3029 }, { "epoch": 3.59, "learning_rate": 0.00014172876182542372, "loss": 0.4068, "step": 3030 }, { "epoch": 3.6, "learning_rate": 0.00014169444395708666, "loss": 0.5908, "step": 3031 }, { "epoch": 3.6, "learning_rate": 0.0001416601201440314, "loss": 0.511, "step": 3032 }, { "epoch": 3.6, "learning_rate": 0.00014162579039115174, "loss": 0.5165, "step": 3033 }, { "epoch": 3.6, "learning_rate": 0.00014159145470334235, "loss": 0.4449, "step": 3034 }, { "epoch": 3.6, "learning_rate": 0.00014155711308549878, "loss": 0.4808, "step": 3035 }, { "epoch": 3.6, "learning_rate": 0.00014152276554251736, "loss": 0.5365, "step": 3036 }, { "epoch": 3.6, "learning_rate": 0.00014148841207929527, "loss": 0.6016, "step": 3037 }, { "epoch": 3.6, "learning_rate": 0.0001414540527007307, "loss": 0.379, "step": 3038 }, { "epoch": 3.61, "learning_rate": 0.00014141968741172238, "loss": 0.6687, "step": 3039 }, { "epoch": 3.61, "learning_rate": 0.00014138531621717018, "loss": 0.6219, "step": 3040 }, { "epoch": 3.61, "learning_rate": 0.0001413509391219746, "loss": 0.3408, "step": 3041 }, { "epoch": 3.61, "learning_rate": 0.00014131655613103708, "loss": 0.5148, "step": 3042 }, { "epoch": 3.61, "learning_rate": 0.0001412821672492599, "loss": 0.3811, "step": 3043 }, { "epoch": 3.61, "learning_rate": 0.0001412477724815462, "loss": 0.4691, "step": 3044 }, { "epoch": 3.61, "learning_rate": 0.00014121337183279988, "loss": 0.6919, "step": 3045 }, { "epoch": 3.61, "learning_rate": 0.0001411789653079257, "loss": 0.5804, "step": 3046 }, { "epoch": 3.61, "learning_rate": 0.00014114455291182933, "loss": 0.418, "step": 3047 }, { "epoch": 3.62, "learning_rate": 0.0001411101346494172, "loss": 0.4422, "step": 3048 }, { "epoch": 3.62, "learning_rate": 0.0001410757105255966, "loss": 0.389, "step": 3049 }, { "epoch": 3.62, "learning_rate": 0.0001410412805452757, "loss": 0.4083, "step": 3050 }, { "epoch": 3.62, "learning_rate": 0.0001410068447133634, "loss": 0.8703, "step": 3051 }, { "epoch": 3.62, "learning_rate": 0.00014097240303476954, "loss": 0.4724, "step": 3052 }, { "epoch": 3.62, "learning_rate": 0.00014093795551440474, "loss": 0.6257, "step": 3053 }, { "epoch": 3.62, "learning_rate": 0.00014090350215718048, "loss": 0.5212, "step": 3054 }, { "epoch": 3.62, "learning_rate": 0.00014086904296800902, "loss": 0.4429, "step": 3055 }, { "epoch": 3.63, "learning_rate": 0.00014083457795180355, "loss": 0.3496, "step": 3056 }, { "epoch": 3.63, "learning_rate": 0.00014080010711347798, "loss": 0.3402, "step": 3057 }, { "epoch": 3.63, "learning_rate": 0.0001407656304579471, "loss": 0.4783, "step": 3058 }, { "epoch": 3.63, "learning_rate": 0.00014073114799012653, "loss": 0.3987, "step": 3059 }, { "epoch": 3.63, "learning_rate": 0.00014069665971493274, "loss": 0.4755, "step": 3060 }, { "epoch": 3.63, "learning_rate": 0.00014066216563728303, "loss": 0.4792, "step": 3061 }, { "epoch": 3.63, "learning_rate": 0.00014062766576209546, "loss": 0.4275, "step": 3062 }, { "epoch": 3.63, "learning_rate": 0.00014059316009428893, "loss": 0.3598, "step": 3063 }, { "epoch": 3.64, "learning_rate": 0.00014055864863878325, "loss": 0.4887, "step": 3064 }, { "epoch": 3.64, "learning_rate": 0.000140524131400499, "loss": 0.5421, "step": 3065 }, { "epoch": 3.64, "learning_rate": 0.00014048960838435753, "loss": 0.352, "step": 3066 }, { "epoch": 3.64, "learning_rate": 0.00014045507959528118, "loss": 0.3124, "step": 3067 }, { "epoch": 3.64, "learning_rate": 0.00014042054503819287, "loss": 0.3955, "step": 3068 }, { "epoch": 3.64, "learning_rate": 0.00014038600471801658, "loss": 0.455, "step": 3069 }, { "epoch": 3.64, "learning_rate": 0.00014035145863967692, "loss": 0.5177, "step": 3070 }, { "epoch": 3.64, "learning_rate": 0.00014031690680809945, "loss": 0.4205, "step": 3071 }, { "epoch": 3.64, "learning_rate": 0.00014028234922821054, "loss": 0.4832, "step": 3072 }, { "epoch": 3.65, "learning_rate": 0.0001402477859049373, "loss": 0.3496, "step": 3073 }, { "epoch": 3.65, "learning_rate": 0.0001402132168432077, "loss": 0.5404, "step": 3074 }, { "epoch": 3.65, "learning_rate": 0.00014017864204795058, "loss": 0.5106, "step": 3075 }, { "epoch": 3.65, "learning_rate": 0.0001401440615240955, "loss": 0.6611, "step": 3076 }, { "epoch": 3.65, "learning_rate": 0.00014010947527657295, "loss": 0.3879, "step": 3077 }, { "epoch": 3.65, "learning_rate": 0.0001400748833103141, "loss": 0.3054, "step": 3078 }, { "epoch": 3.65, "learning_rate": 0.00014004028563025108, "loss": 0.3461, "step": 3079 }, { "epoch": 3.65, "learning_rate": 0.0001400056822413167, "loss": 0.482, "step": 3080 }, { "epoch": 3.66, "learning_rate": 0.0001399710731484447, "loss": 0.3285, "step": 3081 }, { "epoch": 3.66, "learning_rate": 0.00013993645835656953, "loss": 0.363, "step": 3082 }, { "epoch": 3.66, "learning_rate": 0.00013990183787062661, "loss": 0.5092, "step": 3083 }, { "epoch": 3.66, "learning_rate": 0.00013986721169555194, "loss": 0.3009, "step": 3084 }, { "epoch": 3.66, "learning_rate": 0.00013983257983628253, "loss": 0.3831, "step": 3085 }, { "epoch": 3.66, "learning_rate": 0.0001397979422977561, "loss": 0.3718, "step": 3086 }, { "epoch": 3.66, "learning_rate": 0.00013976329908491118, "loss": 0.3401, "step": 3087 }, { "epoch": 3.66, "learning_rate": 0.00013972865020268722, "loss": 0.5294, "step": 3088 }, { "epoch": 3.67, "learning_rate": 0.00013969399565602435, "loss": 0.5054, "step": 3089 }, { "epoch": 3.67, "learning_rate": 0.0001396593354498635, "loss": 0.4247, "step": 3090 }, { "epoch": 3.67, "learning_rate": 0.00013962466958914658, "loss": 0.431, "step": 3091 }, { "epoch": 3.67, "learning_rate": 0.00013958999807881604, "loss": 0.6341, "step": 3092 }, { "epoch": 3.67, "learning_rate": 0.0001395553209238154, "loss": 0.5126, "step": 3093 }, { "epoch": 3.67, "learning_rate": 0.00013952063812908881, "loss": 0.3775, "step": 3094 }, { "epoch": 3.67, "learning_rate": 0.0001394859496995813, "loss": 0.5149, "step": 3095 }, { "epoch": 3.67, "learning_rate": 0.00013945125564023868, "loss": 0.2879, "step": 3096 }, { "epoch": 3.67, "learning_rate": 0.00013941655595600756, "loss": 0.5621, "step": 3097 }, { "epoch": 3.68, "learning_rate": 0.00013938185065183532, "loss": 0.408, "step": 3098 }, { "epoch": 3.68, "learning_rate": 0.00013934713973267024, "loss": 0.4247, "step": 3099 }, { "epoch": 3.68, "learning_rate": 0.0001393124232034613, "loss": 0.4224, "step": 3100 }, { "epoch": 3.68, "learning_rate": 0.0001392777010691584, "loss": 0.4142, "step": 3101 }, { "epoch": 3.68, "learning_rate": 0.00013924297333471204, "loss": 0.6004, "step": 3102 }, { "epoch": 3.68, "learning_rate": 0.00013920824000507374, "loss": 0.6016, "step": 3103 }, { "epoch": 3.68, "learning_rate": 0.0001391735010851956, "loss": 0.4669, "step": 3104 }, { "epoch": 3.68, "learning_rate": 0.00013913875658003074, "loss": 0.3987, "step": 3105 }, { "epoch": 3.69, "learning_rate": 0.0001391040064945329, "loss": 0.471, "step": 3106 }, { "epoch": 3.69, "learning_rate": 0.0001390692508336568, "loss": 0.6135, "step": 3107 }, { "epoch": 3.69, "learning_rate": 0.00013903448960235766, "loss": 0.5369, "step": 3108 }, { "epoch": 3.69, "learning_rate": 0.00013899972280559183, "loss": 0.3295, "step": 3109 }, { "epoch": 3.69, "learning_rate": 0.0001389649504483162, "loss": 0.309, "step": 3110 }, { "epoch": 3.69, "learning_rate": 0.00013893017253548858, "loss": 0.4026, "step": 3111 }, { "epoch": 3.69, "learning_rate": 0.00013889538907206755, "loss": 0.4724, "step": 3112 }, { "epoch": 3.69, "learning_rate": 0.0001388606000630125, "loss": 0.3606, "step": 3113 }, { "epoch": 3.7, "learning_rate": 0.0001388258055132835, "loss": 0.4894, "step": 3114 }, { "epoch": 3.7, "learning_rate": 0.0001387910054278416, "loss": 0.4832, "step": 3115 }, { "epoch": 3.7, "learning_rate": 0.0001387561998116484, "loss": 0.4604, "step": 3116 }, { "epoch": 3.7, "learning_rate": 0.00013872138866966656, "loss": 0.4377, "step": 3117 }, { "epoch": 3.7, "learning_rate": 0.00013868657200685934, "loss": 0.3965, "step": 3118 }, { "epoch": 3.7, "learning_rate": 0.0001386517498281908, "loss": 0.7653, "step": 3119 }, { "epoch": 3.7, "learning_rate": 0.00013861692213862584, "loss": 0.5213, "step": 3120 }, { "epoch": 3.7, "learning_rate": 0.00013858208894313017, "loss": 0.9296, "step": 3121 }, { "epoch": 3.7, "learning_rate": 0.00013854725024667016, "loss": 0.7738, "step": 3122 }, { "epoch": 3.71, "learning_rate": 0.00013851240605421315, "loss": 0.5826, "step": 3123 }, { "epoch": 3.71, "learning_rate": 0.0001384775563707271, "loss": 0.5502, "step": 3124 }, { "epoch": 3.71, "learning_rate": 0.00013844270120118085, "loss": 0.3535, "step": 3125 }, { "epoch": 3.71, "learning_rate": 0.0001384078405505439, "loss": 0.4853, "step": 3126 }, { "epoch": 3.71, "learning_rate": 0.00013837297442378675, "loss": 0.5819, "step": 3127 }, { "epoch": 3.71, "learning_rate": 0.00013833810282588044, "loss": 0.3728, "step": 3128 }, { "epoch": 3.71, "learning_rate": 0.00013830322576179697, "loss": 0.3327, "step": 3129 }, { "epoch": 3.71, "learning_rate": 0.000138268343236509, "loss": 0.4618, "step": 3130 }, { "epoch": 3.72, "learning_rate": 0.00013823345525499004, "loss": 0.3377, "step": 3131 }, { "epoch": 3.72, "learning_rate": 0.00013819856182221434, "loss": 0.3154, "step": 3132 }, { "epoch": 3.72, "learning_rate": 0.00013816366294315695, "loss": 0.5116, "step": 3133 }, { "epoch": 3.72, "learning_rate": 0.0001381287586227937, "loss": 0.4987, "step": 3134 }, { "epoch": 3.72, "learning_rate": 0.00013809384886610118, "loss": 0.5596, "step": 3135 }, { "epoch": 3.72, "eval_loss": 2.939779281616211, "eval_runtime": 283.9953, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 3135 }, { "epoch": 3.72, "learning_rate": 0.00013805893367805678, "loss": 0.5128, "step": 3136 }, { "epoch": 3.72, "learning_rate": 0.0001380240130636386, "loss": 0.3149, "step": 3137 }, { "epoch": 3.72, "learning_rate": 0.00013798908702782558, "loss": 0.4984, "step": 3138 }, { "epoch": 3.73, "learning_rate": 0.0001379541555755974, "loss": 0.626, "step": 3139 }, { "epoch": 3.73, "learning_rate": 0.00013791921871193457, "loss": 0.4949, "step": 3140 }, { "epoch": 3.73, "learning_rate": 0.00013788427644181823, "loss": 0.5654, "step": 3141 }, { "epoch": 3.73, "learning_rate": 0.0001378493287702305, "loss": 0.4197, "step": 3142 }, { "epoch": 3.73, "learning_rate": 0.00013781437570215406, "loss": 0.4341, "step": 3143 }, { "epoch": 3.73, "learning_rate": 0.00013777941724257253, "loss": 0.3576, "step": 3144 }, { "epoch": 3.73, "learning_rate": 0.00013774445339647014, "loss": 0.3098, "step": 3145 }, { "epoch": 3.73, "learning_rate": 0.00013770948416883205, "loss": 0.6052, "step": 3146 }, { "epoch": 3.73, "learning_rate": 0.00013767450956464407, "loss": 0.4327, "step": 3147 }, { "epoch": 3.74, "learning_rate": 0.00013763952958889287, "loss": 0.4717, "step": 3148 }, { "epoch": 3.74, "learning_rate": 0.0001376045442465657, "loss": 0.5263, "step": 3149 }, { "epoch": 3.74, "learning_rate": 0.00013756955354265085, "loss": 0.5021, "step": 3150 }, { "epoch": 3.74, "learning_rate": 0.00013753455748213714, "loss": 0.4066, "step": 3151 }, { "epoch": 3.74, "learning_rate": 0.00013749955607001433, "loss": 0.3461, "step": 3152 }, { "epoch": 3.74, "learning_rate": 0.00013746454931127278, "loss": 0.4318, "step": 3153 }, { "epoch": 3.74, "learning_rate": 0.00013742953721090372, "loss": 0.4195, "step": 3154 }, { "epoch": 3.74, "learning_rate": 0.0001373945197738991, "loss": 0.3862, "step": 3155 }, { "epoch": 3.75, "learning_rate": 0.00013735949700525163, "loss": 0.5916, "step": 3156 }, { "epoch": 3.75, "learning_rate": 0.00013732446890995484, "loss": 0.5336, "step": 3157 }, { "epoch": 3.75, "learning_rate": 0.00013728943549300295, "loss": 0.4104, "step": 3158 }, { "epoch": 3.75, "learning_rate": 0.00013725439675939095, "loss": 0.541, "step": 3159 }, { "epoch": 3.75, "learning_rate": 0.00013721935271411464, "loss": 0.5173, "step": 3160 }, { "epoch": 3.75, "learning_rate": 0.00013718430336217045, "loss": 0.3866, "step": 3161 }, { "epoch": 3.75, "learning_rate": 0.00013714924870855571, "loss": 0.6113, "step": 3162 }, { "epoch": 3.75, "learning_rate": 0.00013711418875826846, "loss": 0.5817, "step": 3163 }, { "epoch": 3.76, "learning_rate": 0.0001370791235163075, "loss": 0.5331, "step": 3164 }, { "epoch": 3.76, "learning_rate": 0.00013704405298767229, "loss": 0.5744, "step": 3165 }, { "epoch": 3.76, "learning_rate": 0.0001370089771773632, "loss": 0.494, "step": 3166 }, { "epoch": 3.76, "learning_rate": 0.00013697389609038124, "loss": 0.4537, "step": 3167 }, { "epoch": 3.76, "learning_rate": 0.00013693880973172822, "loss": 0.5494, "step": 3168 }, { "epoch": 3.76, "learning_rate": 0.00013690371810640665, "loss": 0.537, "step": 3169 }, { "epoch": 3.76, "learning_rate": 0.0001368686212194199, "loss": 0.4698, "step": 3170 }, { "epoch": 3.76, "learning_rate": 0.00013683351907577194, "loss": 0.5254, "step": 3171 }, { "epoch": 3.76, "learning_rate": 0.00013679841168046767, "loss": 0.3857, "step": 3172 }, { "epoch": 3.77, "learning_rate": 0.00013676329903851254, "loss": 0.4464, "step": 3173 }, { "epoch": 3.77, "learning_rate": 0.0001367281811549129, "loss": 0.5651, "step": 3174 }, { "epoch": 3.77, "learning_rate": 0.0001366930580346758, "loss": 0.4192, "step": 3175 }, { "epoch": 3.77, "learning_rate": 0.000136657929682809, "loss": 0.3364, "step": 3176 }, { "epoch": 3.77, "learning_rate": 0.00013662279610432104, "loss": 0.3539, "step": 3177 }, { "epoch": 3.77, "learning_rate": 0.00013658765730422125, "loss": 0.6074, "step": 3178 }, { "epoch": 3.77, "learning_rate": 0.00013655251328751957, "loss": 0.5322, "step": 3179 }, { "epoch": 3.77, "learning_rate": 0.00013651736405922686, "loss": 0.4176, "step": 3180 }, { "epoch": 3.78, "learning_rate": 0.00013648220962435458, "loss": 0.4878, "step": 3181 }, { "epoch": 3.78, "learning_rate": 0.000136447049987915, "loss": 0.6351, "step": 3182 }, { "epoch": 3.78, "learning_rate": 0.00013641188515492109, "loss": 0.4487, "step": 3183 }, { "epoch": 3.78, "learning_rate": 0.0001363767151303866, "loss": 0.4451, "step": 3184 }, { "epoch": 3.78, "learning_rate": 0.00013634153991932607, "loss": 0.4944, "step": 3185 }, { "epoch": 3.78, "learning_rate": 0.0001363063595267547, "loss": 0.5932, "step": 3186 }, { "epoch": 3.78, "learning_rate": 0.00013627117395768833, "loss": 0.4964, "step": 3187 }, { "epoch": 3.78, "learning_rate": 0.0001362359832171438, "loss": 0.6795, "step": 3188 }, { "epoch": 3.79, "learning_rate": 0.00013620078731013845, "loss": 0.3862, "step": 3189 }, { "epoch": 3.79, "learning_rate": 0.0001361655862416905, "loss": 0.3425, "step": 3190 }, { "epoch": 3.79, "learning_rate": 0.0001361303800168188, "loss": 0.4361, "step": 3191 }, { "epoch": 3.79, "learning_rate": 0.0001360951686405431, "loss": 0.5774, "step": 3192 }, { "epoch": 3.79, "learning_rate": 0.00013605995211788365, "loss": 0.4044, "step": 3193 }, { "epoch": 3.79, "learning_rate": 0.00013602473045386165, "loss": 0.3858, "step": 3194 }, { "epoch": 3.79, "learning_rate": 0.00013598950365349883, "loss": 0.6136, "step": 3195 }, { "epoch": 3.79, "learning_rate": 0.00013595427172181785, "loss": 0.329, "step": 3196 }, { "epoch": 3.79, "learning_rate": 0.00013591903466384203, "loss": 0.3898, "step": 3197 }, { "epoch": 3.8, "learning_rate": 0.00013588379248459536, "loss": 0.4809, "step": 3198 }, { "epoch": 3.8, "learning_rate": 0.00013584854518910262, "loss": 0.4108, "step": 3199 }, { "epoch": 3.8, "learning_rate": 0.00013581329278238927, "loss": 0.4655, "step": 3200 }, { "epoch": 3.8, "learning_rate": 0.00013577803526948162, "loss": 0.4657, "step": 3201 }, { "epoch": 3.8, "learning_rate": 0.00013574277265540654, "loss": 0.4842, "step": 3202 }, { "epoch": 3.8, "learning_rate": 0.00013570750494519175, "loss": 0.4593, "step": 3203 }, { "epoch": 3.8, "learning_rate": 0.00013567223214386564, "loss": 0.435, "step": 3204 }, { "epoch": 3.8, "learning_rate": 0.00013563695425645737, "loss": 0.7146, "step": 3205 }, { "epoch": 3.81, "learning_rate": 0.00013560167128799674, "loss": 0.5027, "step": 3206 }, { "epoch": 3.81, "learning_rate": 0.00013556638324351442, "loss": 0.4844, "step": 3207 }, { "epoch": 3.81, "learning_rate": 0.00013553109012804163, "loss": 0.7605, "step": 3208 }, { "epoch": 3.81, "learning_rate": 0.00013549579194661044, "loss": 0.396, "step": 3209 }, { "epoch": 3.81, "learning_rate": 0.00013546048870425356, "loss": 0.5178, "step": 3210 }, { "epoch": 3.81, "learning_rate": 0.00013542518040600453, "loss": 0.6946, "step": 3211 }, { "epoch": 3.81, "learning_rate": 0.0001353898670568975, "loss": 0.5054, "step": 3212 }, { "epoch": 3.81, "learning_rate": 0.00013535454866196739, "loss": 0.4495, "step": 3213 }, { "epoch": 3.82, "learning_rate": 0.00013531922522624982, "loss": 0.5138, "step": 3214 }, { "epoch": 3.82, "learning_rate": 0.0001352838967547812, "loss": 0.4706, "step": 3215 }, { "epoch": 3.82, "learning_rate": 0.00013524856325259848, "loss": 0.5193, "step": 3216 }, { "epoch": 3.82, "learning_rate": 0.0001352132247247396, "loss": 0.4436, "step": 3217 }, { "epoch": 3.82, "learning_rate": 0.00013517788117624292, "loss": 0.4139, "step": 3218 }, { "epoch": 3.82, "learning_rate": 0.0001351425326121478, "loss": 0.5937, "step": 3219 }, { "epoch": 3.82, "learning_rate": 0.000135107179037494, "loss": 0.3375, "step": 3220 }, { "epoch": 3.82, "learning_rate": 0.00013507182045732234, "loss": 0.3712, "step": 3221 }, { "epoch": 3.82, "learning_rate": 0.00013503645687667408, "loss": 0.3424, "step": 3222 }, { "epoch": 3.83, "learning_rate": 0.00013500108830059133, "loss": 0.3333, "step": 3223 }, { "epoch": 3.83, "learning_rate": 0.00013496571473411688, "loss": 0.4042, "step": 3224 }, { "epoch": 3.83, "learning_rate": 0.00013493033618229417, "loss": 0.4963, "step": 3225 }, { "epoch": 3.83, "learning_rate": 0.0001348949526501675, "loss": 0.3946, "step": 3226 }, { "epoch": 3.83, "learning_rate": 0.00013485956414278178, "loss": 0.5807, "step": 3227 }, { "epoch": 3.83, "learning_rate": 0.00013482417066518256, "loss": 0.4561, "step": 3228 }, { "epoch": 3.83, "learning_rate": 0.00013478877222241627, "loss": 0.4964, "step": 3229 }, { "epoch": 3.83, "learning_rate": 0.00013475336881952986, "loss": 0.6429, "step": 3230 }, { "epoch": 3.84, "learning_rate": 0.00013471796046157116, "loss": 0.5466, "step": 3231 }, { "epoch": 3.84, "learning_rate": 0.00013468254715358861, "loss": 0.3882, "step": 3232 }, { "epoch": 3.84, "learning_rate": 0.00013464712890063138, "loss": 0.5006, "step": 3233 }, { "epoch": 3.84, "learning_rate": 0.0001346117057077493, "loss": 0.494, "step": 3234 }, { "epoch": 3.84, "learning_rate": 0.00013457627757999303, "loss": 0.5444, "step": 3235 }, { "epoch": 3.84, "learning_rate": 0.00013454084452241372, "loss": 0.3714, "step": 3236 }, { "epoch": 3.84, "learning_rate": 0.00013450540654006348, "loss": 0.3335, "step": 3237 }, { "epoch": 3.84, "learning_rate": 0.0001344699636379949, "loss": 0.4771, "step": 3238 }, { "epoch": 3.85, "learning_rate": 0.00013443451582126144, "loss": 0.466, "step": 3239 }, { "epoch": 3.85, "learning_rate": 0.00013439906309491712, "loss": 0.5537, "step": 3240 }, { "epoch": 3.85, "learning_rate": 0.00013436360546401676, "loss": 0.5899, "step": 3241 }, { "epoch": 3.85, "learning_rate": 0.00013432814293361584, "loss": 0.443, "step": 3242 }, { "epoch": 3.85, "learning_rate": 0.00013429267550877055, "loss": 0.4238, "step": 3243 }, { "epoch": 3.85, "learning_rate": 0.00013425720319453773, "loss": 0.6529, "step": 3244 }, { "epoch": 3.85, "learning_rate": 0.00013422172599597505, "loss": 0.6163, "step": 3245 }, { "epoch": 3.85, "learning_rate": 0.00013418624391814068, "loss": 0.5183, "step": 3246 }, { "epoch": 3.85, "learning_rate": 0.00013415075696609363, "loss": 0.7659, "step": 3247 }, { "epoch": 3.86, "learning_rate": 0.0001341152651448936, "loss": 0.3717, "step": 3248 }, { "epoch": 3.86, "learning_rate": 0.0001340797684596009, "loss": 0.6885, "step": 3249 }, { "epoch": 3.86, "learning_rate": 0.0001340442669152766, "loss": 0.4483, "step": 3250 }, { "epoch": 3.86, "learning_rate": 0.0001340087605169825, "loss": 0.3417, "step": 3251 }, { "epoch": 3.86, "learning_rate": 0.00013397324926978094, "loss": 0.4751, "step": 3252 }, { "epoch": 3.86, "learning_rate": 0.00013393773317873508, "loss": 0.4448, "step": 3253 }, { "epoch": 3.86, "learning_rate": 0.00013390221224890878, "loss": 0.6278, "step": 3254 }, { "epoch": 3.86, "learning_rate": 0.00013386668648536655, "loss": 0.2995, "step": 3255 }, { "epoch": 3.87, "learning_rate": 0.00013383115589317353, "loss": 0.535, "step": 3256 }, { "epoch": 3.87, "learning_rate": 0.00013379562047739568, "loss": 0.4972, "step": 3257 }, { "epoch": 3.87, "learning_rate": 0.00013376008024309948, "loss": 0.4821, "step": 3258 }, { "epoch": 3.87, "learning_rate": 0.0001337245351953523, "loss": 0.392, "step": 3259 }, { "epoch": 3.87, "learning_rate": 0.000133688985339222, "loss": 0.413, "step": 3260 }, { "epoch": 3.87, "learning_rate": 0.00013365343067977726, "loss": 0.4689, "step": 3261 }, { "epoch": 3.87, "learning_rate": 0.00013361787122208744, "loss": 0.4737, "step": 3262 }, { "epoch": 3.87, "learning_rate": 0.00013358230697122246, "loss": 0.5033, "step": 3263 }, { "epoch": 3.88, "learning_rate": 0.00013354673793225302, "loss": 0.4901, "step": 3264 }, { "epoch": 3.88, "learning_rate": 0.00013351116411025054, "loss": 0.5776, "step": 3265 }, { "epoch": 3.88, "learning_rate": 0.00013347558551028702, "loss": 0.5005, "step": 3266 }, { "epoch": 3.88, "learning_rate": 0.00013344000213743522, "loss": 0.6475, "step": 3267 }, { "epoch": 3.88, "learning_rate": 0.00013340441399676856, "loss": 0.4394, "step": 3268 }, { "epoch": 3.88, "learning_rate": 0.0001333688210933611, "loss": 0.4351, "step": 3269 }, { "epoch": 3.88, "learning_rate": 0.0001333332234322876, "loss": 0.4526, "step": 3270 }, { "epoch": 3.88, "learning_rate": 0.0001332976210186236, "loss": 0.3006, "step": 3271 }, { "epoch": 3.88, "learning_rate": 0.00013326201385744518, "loss": 0.382, "step": 3272 }, { "epoch": 3.89, "learning_rate": 0.00013322640195382907, "loss": 0.3488, "step": 3273 }, { "epoch": 3.89, "learning_rate": 0.00013319078531285285, "loss": 0.5538, "step": 3274 }, { "epoch": 3.89, "learning_rate": 0.00013315516393959463, "loss": 0.5328, "step": 3275 }, { "epoch": 3.89, "learning_rate": 0.00013311953783913324, "loss": 0.5216, "step": 3276 }, { "epoch": 3.89, "learning_rate": 0.0001330839070165482, "loss": 0.3845, "step": 3277 }, { "epoch": 3.89, "learning_rate": 0.0001330482714769197, "loss": 0.5293, "step": 3278 }, { "epoch": 3.89, "learning_rate": 0.00013301263122532855, "loss": 0.5415, "step": 3279 }, { "epoch": 3.89, "learning_rate": 0.0001329769862668563, "loss": 0.5309, "step": 3280 }, { "epoch": 3.9, "learning_rate": 0.00013294133660658516, "loss": 0.4629, "step": 3281 }, { "epoch": 3.9, "learning_rate": 0.00013290568224959794, "loss": 0.4329, "step": 3282 }, { "epoch": 3.9, "learning_rate": 0.00013287002320097821, "loss": 0.3973, "step": 3283 }, { "epoch": 3.9, "learning_rate": 0.0001328343594658102, "loss": 0.3417, "step": 3284 }, { "epoch": 3.9, "learning_rate": 0.00013279869104917873, "loss": 0.4784, "step": 3285 }, { "epoch": 3.9, "learning_rate": 0.00013276301795616936, "loss": 0.3668, "step": 3286 }, { "epoch": 3.9, "learning_rate": 0.0001327273401918683, "loss": 0.3726, "step": 3287 }, { "epoch": 3.9, "learning_rate": 0.00013269165776136238, "loss": 0.518, "step": 3288 }, { "epoch": 3.91, "learning_rate": 0.00013265597066973922, "loss": 0.3864, "step": 3289 }, { "epoch": 3.91, "learning_rate": 0.00013262027892208694, "loss": 0.4249, "step": 3290 }, { "epoch": 3.91, "learning_rate": 0.00013258458252349444, "loss": 0.395, "step": 3291 }, { "epoch": 3.91, "learning_rate": 0.00013254888147905126, "loss": 0.8359, "step": 3292 }, { "epoch": 3.91, "learning_rate": 0.00013251317579384756, "loss": 0.5028, "step": 3293 }, { "epoch": 3.91, "learning_rate": 0.0001324774654729742, "loss": 0.4216, "step": 3294 }, { "epoch": 3.91, "learning_rate": 0.0001324417505215227, "loss": 0.6145, "step": 3295 }, { "epoch": 3.91, "learning_rate": 0.00013240603094458522, "loss": 0.6158, "step": 3296 }, { "epoch": 3.91, "learning_rate": 0.00013237030674725464, "loss": 0.5101, "step": 3297 }, { "epoch": 3.92, "learning_rate": 0.0001323345779346244, "loss": 0.6933, "step": 3298 }, { "epoch": 3.92, "learning_rate": 0.0001322988445117886, "loss": 0.4192, "step": 3299 }, { "epoch": 3.92, "learning_rate": 0.0001322631064838422, "loss": 0.4549, "step": 3300 }, { "epoch": 3.92, "learning_rate": 0.00013222736385588054, "loss": 0.4947, "step": 3301 }, { "epoch": 3.92, "learning_rate": 0.00013219161663299982, "loss": 0.5383, "step": 3302 }, { "epoch": 3.92, "learning_rate": 0.00013215586482029669, "loss": 0.4919, "step": 3303 }, { "epoch": 3.92, "learning_rate": 0.0001321201084228687, "loss": 0.4603, "step": 3304 }, { "epoch": 3.92, "learning_rate": 0.00013208434744581385, "loss": 0.3127, "step": 3305 }, { "epoch": 3.93, "learning_rate": 0.00013204858189423097, "loss": 0.754, "step": 3306 }, { "epoch": 3.93, "learning_rate": 0.00013201281177321935, "loss": 0.3746, "step": 3307 }, { "epoch": 3.93, "learning_rate": 0.00013197703708787913, "loss": 0.5576, "step": 3308 }, { "epoch": 3.93, "learning_rate": 0.0001319412578433109, "loss": 0.4992, "step": 3309 }, { "epoch": 3.93, "learning_rate": 0.00013190547404461598, "loss": 0.4533, "step": 3310 }, { "epoch": 3.93, "learning_rate": 0.0001318696856968965, "loss": 0.4155, "step": 3311 }, { "epoch": 3.93, "learning_rate": 0.00013183389280525497, "loss": 0.3661, "step": 3312 }, { "epoch": 3.93, "learning_rate": 0.00013179809537479476, "loss": 0.4512, "step": 3313 }, { "epoch": 3.94, "learning_rate": 0.00013176229341061975, "loss": 0.5895, "step": 3314 }, { "epoch": 3.94, "learning_rate": 0.00013172648691783454, "loss": 0.3308, "step": 3315 }, { "epoch": 3.94, "learning_rate": 0.00013169067590154432, "loss": 0.4128, "step": 3316 }, { "epoch": 3.94, "learning_rate": 0.00013165486036685503, "loss": 0.5432, "step": 3317 }, { "epoch": 3.94, "learning_rate": 0.0001316190403188731, "loss": 0.4297, "step": 3318 }, { "epoch": 3.94, "learning_rate": 0.00013158321576270575, "loss": 0.4259, "step": 3319 }, { "epoch": 3.94, "learning_rate": 0.0001315473867034608, "loss": 0.4428, "step": 3320 }, { "epoch": 3.94, "learning_rate": 0.0001315115531462466, "loss": 0.6495, "step": 3321 }, { "epoch": 3.94, "learning_rate": 0.00013147571509617228, "loss": 0.5706, "step": 3322 }, { "epoch": 3.95, "learning_rate": 0.0001314398725583476, "loss": 0.3647, "step": 3323 }, { "epoch": 3.95, "learning_rate": 0.0001314040255378829, "loss": 0.4864, "step": 3324 }, { "epoch": 3.95, "learning_rate": 0.00013136817403988917, "loss": 0.4197, "step": 3325 }, { "epoch": 3.95, "learning_rate": 0.00013133231806947805, "loss": 0.4818, "step": 3326 }, { "epoch": 3.95, "learning_rate": 0.00013129645763176184, "loss": 0.4201, "step": 3327 }, { "epoch": 3.95, "learning_rate": 0.0001312605927318534, "loss": 0.4352, "step": 3328 }, { "epoch": 3.95, "learning_rate": 0.0001312247233748664, "loss": 0.2785, "step": 3329 }, { "epoch": 3.95, "learning_rate": 0.0001311888495659149, "loss": 0.4424, "step": 3330 }, { "epoch": 3.96, "learning_rate": 0.00013115297131011382, "loss": 0.4258, "step": 3331 }, { "epoch": 3.96, "learning_rate": 0.00013111708861257855, "loss": 0.4332, "step": 3332 }, { "epoch": 3.96, "learning_rate": 0.00013108120147842519, "loss": 0.3578, "step": 3333 }, { "epoch": 3.96, "learning_rate": 0.0001310453099127705, "loss": 0.4219, "step": 3334 }, { "epoch": 3.96, "learning_rate": 0.0001310094139207318, "loss": 0.5837, "step": 3335 }, { "epoch": 3.96, "learning_rate": 0.0001309735135074271, "loss": 0.3965, "step": 3336 }, { "epoch": 3.96, "learning_rate": 0.00013093760867797502, "loss": 0.4764, "step": 3337 }, { "epoch": 3.96, "learning_rate": 0.00013090169943749476, "loss": 0.4933, "step": 3338 }, { "epoch": 3.97, "learning_rate": 0.00013086578579110623, "loss": 0.3434, "step": 3339 }, { "epoch": 3.97, "learning_rate": 0.0001308298677439299, "loss": 0.5931, "step": 3340 }, { "epoch": 3.97, "learning_rate": 0.00013079394530108695, "loss": 0.442, "step": 3341 }, { "epoch": 3.97, "learning_rate": 0.0001307580184676991, "loss": 0.3229, "step": 3342 }, { "epoch": 3.97, "learning_rate": 0.0001307220872488888, "loss": 0.4567, "step": 3343 }, { "epoch": 3.97, "learning_rate": 0.00013068615164977895, "loss": 0.6224, "step": 3344 }, { "epoch": 3.97, "eval_loss": 2.954587936401367, "eval_runtime": 283.9817, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 3344 }, { "epoch": 3.97, "learning_rate": 0.00013065021167549322, "loss": 0.6767, "step": 3345 }, { "epoch": 3.97, "learning_rate": 0.0001306142673311559, "loss": 0.4809, "step": 3346 }, { "epoch": 3.97, "learning_rate": 0.00013057831862189187, "loss": 0.4563, "step": 3347 }, { "epoch": 3.98, "learning_rate": 0.00013054236555282657, "loss": 0.4674, "step": 3348 }, { "epoch": 3.98, "learning_rate": 0.00013050640812908623, "loss": 0.6636, "step": 3349 }, { "epoch": 3.98, "learning_rate": 0.00013047044635579747, "loss": 0.4652, "step": 3350 }, { "epoch": 3.98, "learning_rate": 0.00013043448023808773, "loss": 0.3912, "step": 3351 }, { "epoch": 3.98, "learning_rate": 0.000130398509781085, "loss": 0.6064, "step": 3352 }, { "epoch": 3.98, "learning_rate": 0.00013036253498991787, "loss": 0.5975, "step": 3353 }, { "epoch": 3.98, "learning_rate": 0.00013032655586971552, "loss": 0.7249, "step": 3354 }, { "epoch": 3.98, "learning_rate": 0.00013029057242560784, "loss": 0.4604, "step": 3355 }, { "epoch": 3.99, "learning_rate": 0.00013025458466272525, "loss": 0.4895, "step": 3356 }, { "epoch": 3.99, "learning_rate": 0.0001302185925861988, "loss": 0.3628, "step": 3357 }, { "epoch": 3.99, "learning_rate": 0.00013018259620116025, "loss": 0.4798, "step": 3358 }, { "epoch": 3.99, "learning_rate": 0.00013014659551274189, "loss": 0.663, "step": 3359 }, { "epoch": 3.99, "learning_rate": 0.00013011059052607656, "loss": 0.3923, "step": 3360 }, { "epoch": 3.99, "learning_rate": 0.00013007458124629785, "loss": 0.5601, "step": 3361 }, { "epoch": 3.99, "learning_rate": 0.00013003856767853983, "loss": 0.67, "step": 3362 }, { "epoch": 3.99, "learning_rate": 0.00013000254982793735, "loss": 0.5059, "step": 3363 }, { "epoch": 4.0, "learning_rate": 0.00012996652769962566, "loss": 0.4992, "step": 3364 }, { "epoch": 4.0, "learning_rate": 0.00012993050129874082, "loss": 0.6196, "step": 3365 }, { "epoch": 4.0, "learning_rate": 0.00012989447063041935, "loss": 0.4157, "step": 3366 }, { "epoch": 4.0, "learning_rate": 0.00012985843569979848, "loss": 0.5714, "step": 3367 }, { "epoch": 4.0, "learning_rate": 0.000129822396512016, "loss": 0.7484, "step": 3368 }, { "epoch": 4.0, "learning_rate": 0.00012978635307221026, "loss": 0.3928, "step": 3369 }, { "epoch": 4.0, "learning_rate": 0.00012975030538552032, "loss": 0.4129, "step": 3370 }, { "epoch": 4.0, "learning_rate": 0.0001297142534570858, "loss": 0.5407, "step": 3371 }, { "epoch": 4.0, "learning_rate": 0.00012967819729204684, "loss": 0.479, "step": 3372 }, { "epoch": 4.01, "learning_rate": 0.00012964213689554437, "loss": 0.4492, "step": 3373 }, { "epoch": 4.01, "learning_rate": 0.00012960607227271973, "loss": 0.4574, "step": 3374 }, { "epoch": 4.01, "learning_rate": 0.00012957000342871502, "loss": 0.7554, "step": 3375 }, { "epoch": 4.01, "learning_rate": 0.00012953393036867282, "loss": 0.3038, "step": 3376 }, { "epoch": 4.01, "learning_rate": 0.0001294978530977364, "loss": 0.5125, "step": 3377 }, { "epoch": 4.01, "learning_rate": 0.0001294617716210495, "loss": 0.7192, "step": 3378 }, { "epoch": 4.01, "learning_rate": 0.00012942568594375667, "loss": 0.4371, "step": 3379 }, { "epoch": 4.01, "learning_rate": 0.00012938959607100288, "loss": 0.3672, "step": 3380 }, { "epoch": 4.0, "learning_rate": 0.00012935350200793378, "loss": 0.4752, "step": 3381 }, { "epoch": 4.0, "learning_rate": 0.0001293174037596956, "loss": 0.225, "step": 3382 }, { "epoch": 4.0, "learning_rate": 0.00012928130133143512, "loss": 0.2106, "step": 3383 }, { "epoch": 4.0, "learning_rate": 0.00012924519472829978, "loss": 0.213, "step": 3384 }, { "epoch": 4.01, "learning_rate": 0.0001292090839554376, "loss": 0.2775, "step": 3385 }, { "epoch": 4.01, "learning_rate": 0.0001291729690179972, "loss": 0.2417, "step": 3386 }, { "epoch": 4.01, "learning_rate": 0.0001291368499211278, "loss": 0.2212, "step": 3387 }, { "epoch": 4.01, "learning_rate": 0.00012910072666997912, "loss": 0.2644, "step": 3388 }, { "epoch": 4.01, "learning_rate": 0.00012906459926970162, "loss": 0.2206, "step": 3389 }, { "epoch": 4.01, "learning_rate": 0.00012902846772544624, "loss": 0.2238, "step": 3390 }, { "epoch": 4.01, "learning_rate": 0.00012899233204236455, "loss": 0.2212, "step": 3391 }, { "epoch": 4.01, "learning_rate": 0.00012895619222560878, "loss": 0.2082, "step": 3392 }, { "epoch": 4.02, "learning_rate": 0.00012892004828033156, "loss": 0.2896, "step": 3393 }, { "epoch": 4.02, "learning_rate": 0.00012888390021168636, "loss": 0.2351, "step": 3394 }, { "epoch": 4.02, "learning_rate": 0.00012884774802482697, "loss": 0.2263, "step": 3395 }, { "epoch": 4.02, "learning_rate": 0.000128811591724908, "loss": 0.2243, "step": 3396 }, { "epoch": 4.02, "learning_rate": 0.0001287754313170845, "loss": 0.2433, "step": 3397 }, { "epoch": 4.02, "learning_rate": 0.00012873926680651222, "loss": 0.2566, "step": 3398 }, { "epoch": 4.02, "learning_rate": 0.00012870309819834735, "loss": 0.2537, "step": 3399 }, { "epoch": 4.02, "learning_rate": 0.00012866692549774682, "loss": 0.298, "step": 3400 }, { "epoch": 4.03, "learning_rate": 0.000128630748709868, "loss": 0.2246, "step": 3401 }, { "epoch": 4.03, "learning_rate": 0.00012859456783986893, "loss": 0.2179, "step": 3402 }, { "epoch": 4.03, "learning_rate": 0.00012855838289290821, "loss": 0.2394, "step": 3403 }, { "epoch": 4.03, "learning_rate": 0.0001285221938741451, "loss": 0.2068, "step": 3404 }, { "epoch": 4.03, "learning_rate": 0.00012848600078873925, "loss": 0.1961, "step": 3405 }, { "epoch": 4.03, "learning_rate": 0.00012844980364185108, "loss": 0.2719, "step": 3406 }, { "epoch": 4.03, "learning_rate": 0.00012841360243864147, "loss": 0.2009, "step": 3407 }, { "epoch": 4.03, "learning_rate": 0.00012837739718427196, "loss": 0.2343, "step": 3408 }, { "epoch": 4.03, "learning_rate": 0.00012834118788390456, "loss": 0.3161, "step": 3409 }, { "epoch": 4.04, "learning_rate": 0.00012830497454270205, "loss": 0.1992, "step": 3410 }, { "epoch": 4.04, "learning_rate": 0.00012826875716582755, "loss": 0.261, "step": 3411 }, { "epoch": 4.04, "learning_rate": 0.00012823253575844495, "loss": 0.2403, "step": 3412 }, { "epoch": 4.04, "learning_rate": 0.00012819631032571854, "loss": 0.2271, "step": 3413 }, { "epoch": 4.04, "learning_rate": 0.00012816008087281332, "loss": 0.2062, "step": 3414 }, { "epoch": 4.04, "learning_rate": 0.00012812384740489485, "loss": 0.2133, "step": 3415 }, { "epoch": 4.04, "learning_rate": 0.00012808760992712924, "loss": 0.2372, "step": 3416 }, { "epoch": 4.04, "learning_rate": 0.00012805136844468309, "loss": 0.2466, "step": 3417 }, { "epoch": 4.05, "learning_rate": 0.00012801512296272368, "loss": 0.2456, "step": 3418 }, { "epoch": 4.05, "learning_rate": 0.00012797887348641883, "loss": 0.2171, "step": 3419 }, { "epoch": 4.05, "learning_rate": 0.00012794262002093697, "loss": 0.3038, "step": 3420 }, { "epoch": 4.05, "learning_rate": 0.000127906362571447, "loss": 0.1868, "step": 3421 }, { "epoch": 4.05, "learning_rate": 0.00012787010114311844, "loss": 0.2611, "step": 3422 }, { "epoch": 4.05, "learning_rate": 0.00012783383574112138, "loss": 0.2131, "step": 3423 }, { "epoch": 4.05, "learning_rate": 0.0001277975663706265, "loss": 0.2005, "step": 3424 }, { "epoch": 4.05, "learning_rate": 0.000127761293036805, "loss": 0.2455, "step": 3425 }, { "epoch": 4.06, "learning_rate": 0.0001277250157448287, "loss": 0.2837, "step": 3426 }, { "epoch": 4.06, "learning_rate": 0.00012768873449986988, "loss": 0.2252, "step": 3427 }, { "epoch": 4.06, "learning_rate": 0.00012765244930710155, "loss": 0.211, "step": 3428 }, { "epoch": 4.06, "learning_rate": 0.00012761616017169708, "loss": 0.1831, "step": 3429 }, { "epoch": 4.06, "learning_rate": 0.0001275798670988306, "loss": 0.1985, "step": 3430 }, { "epoch": 4.06, "learning_rate": 0.00012754357009367665, "loss": 0.2341, "step": 3431 }, { "epoch": 4.06, "learning_rate": 0.00012750726916141046, "loss": 0.2395, "step": 3432 }, { "epoch": 4.06, "learning_rate": 0.00012747096430720765, "loss": 0.2183, "step": 3433 }, { "epoch": 4.06, "learning_rate": 0.0001274346555362446, "loss": 0.2698, "step": 3434 }, { "epoch": 4.07, "learning_rate": 0.00012739834285369807, "loss": 0.2104, "step": 3435 }, { "epoch": 4.07, "learning_rate": 0.0001273620262647455, "loss": 0.2395, "step": 3436 }, { "epoch": 4.07, "learning_rate": 0.00012732570577456484, "loss": 0.2218, "step": 3437 }, { "epoch": 4.07, "learning_rate": 0.00012728938138833462, "loss": 0.2337, "step": 3438 }, { "epoch": 4.07, "learning_rate": 0.00012725305311123386, "loss": 0.1958, "step": 3439 }, { "epoch": 4.07, "learning_rate": 0.0001272167209484422, "loss": 0.2767, "step": 3440 }, { "epoch": 4.07, "learning_rate": 0.00012718038490513984, "loss": 0.2238, "step": 3441 }, { "epoch": 4.07, "learning_rate": 0.00012714404498650743, "loss": 0.2931, "step": 3442 }, { "epoch": 4.08, "learning_rate": 0.00012710770119772632, "loss": 0.3166, "step": 3443 }, { "epoch": 4.08, "learning_rate": 0.00012707135354397836, "loss": 0.1985, "step": 3444 }, { "epoch": 4.08, "learning_rate": 0.00012703500203044586, "loss": 0.2208, "step": 3445 }, { "epoch": 4.08, "learning_rate": 0.0001269986466623118, "loss": 0.2279, "step": 3446 }, { "epoch": 4.08, "learning_rate": 0.00012696228744475964, "loss": 0.2656, "step": 3447 }, { "epoch": 4.08, "learning_rate": 0.00012692592438297341, "loss": 0.2181, "step": 3448 }, { "epoch": 4.08, "learning_rate": 0.00012688955748213772, "loss": 0.1994, "step": 3449 }, { "epoch": 4.08, "learning_rate": 0.0001268531867474377, "loss": 0.2818, "step": 3450 }, { "epoch": 4.09, "learning_rate": 0.00012681681218405897, "loss": 0.2277, "step": 3451 }, { "epoch": 4.09, "learning_rate": 0.00012678043379718782, "loss": 0.2692, "step": 3452 }, { "epoch": 4.09, "learning_rate": 0.00012674405159201091, "loss": 0.2664, "step": 3453 }, { "epoch": 4.09, "learning_rate": 0.00012670766557371565, "loss": 0.2008, "step": 3454 }, { "epoch": 4.09, "learning_rate": 0.00012667127574748986, "loss": 0.2382, "step": 3455 }, { "epoch": 4.09, "learning_rate": 0.0001266348821185219, "loss": 0.2454, "step": 3456 }, { "epoch": 4.09, "learning_rate": 0.0001265984846920008, "loss": 0.3547, "step": 3457 }, { "epoch": 4.09, "learning_rate": 0.00012656208347311594, "loss": 0.2115, "step": 3458 }, { "epoch": 4.09, "learning_rate": 0.00012652567846705736, "loss": 0.1929, "step": 3459 }, { "epoch": 4.1, "learning_rate": 0.00012648926967901567, "loss": 0.2076, "step": 3460 }, { "epoch": 4.1, "learning_rate": 0.00012645285711418194, "loss": 0.2045, "step": 3461 }, { "epoch": 4.1, "learning_rate": 0.00012641644077774776, "loss": 0.2378, "step": 3462 }, { "epoch": 4.1, "learning_rate": 0.0001263800206749054, "loss": 0.2674, "step": 3463 }, { "epoch": 4.1, "learning_rate": 0.00012634359681084752, "loss": 0.2125, "step": 3464 }, { "epoch": 4.1, "learning_rate": 0.00012630716919076736, "loss": 0.2097, "step": 3465 }, { "epoch": 4.1, "learning_rate": 0.0001262707378198587, "loss": 0.2352, "step": 3466 }, { "epoch": 4.1, "learning_rate": 0.0001262343027033159, "loss": 0.2105, "step": 3467 }, { "epoch": 4.11, "learning_rate": 0.00012619786384633375, "loss": 0.2207, "step": 3468 }, { "epoch": 4.11, "learning_rate": 0.0001261614212541077, "loss": 0.304, "step": 3469 }, { "epoch": 4.11, "learning_rate": 0.00012612497493183364, "loss": 0.2239, "step": 3470 }, { "epoch": 4.11, "learning_rate": 0.00012608852488470802, "loss": 0.2875, "step": 3471 }, { "epoch": 4.11, "learning_rate": 0.0001260520711179278, "loss": 0.3197, "step": 3472 }, { "epoch": 4.11, "learning_rate": 0.00012601561363669058, "loss": 0.1942, "step": 3473 }, { "epoch": 4.11, "learning_rate": 0.00012597915244619428, "loss": 0.2117, "step": 3474 }, { "epoch": 4.11, "learning_rate": 0.00012594268755163754, "loss": 0.2222, "step": 3475 }, { "epoch": 4.12, "learning_rate": 0.00012590621895821943, "loss": 0.1871, "step": 3476 }, { "epoch": 4.12, "learning_rate": 0.0001258697466711396, "loss": 0.2146, "step": 3477 }, { "epoch": 4.12, "learning_rate": 0.0001258332706955982, "loss": 0.3307, "step": 3478 }, { "epoch": 4.12, "learning_rate": 0.00012579679103679592, "loss": 0.2175, "step": 3479 }, { "epoch": 4.12, "learning_rate": 0.00012576030769993393, "loss": 0.2976, "step": 3480 }, { "epoch": 4.12, "learning_rate": 0.000125723820690214, "loss": 0.2031, "step": 3481 }, { "epoch": 4.12, "learning_rate": 0.00012568733001283827, "loss": 0.2046, "step": 3482 }, { "epoch": 4.12, "learning_rate": 0.0001256508356730097, "loss": 0.2642, "step": 3483 }, { "epoch": 4.12, "learning_rate": 0.00012561433767593145, "loss": 0.2088, "step": 3484 }, { "epoch": 4.13, "learning_rate": 0.0001255778360268074, "loss": 0.2458, "step": 3485 }, { "epoch": 4.13, "learning_rate": 0.0001255413307308418, "loss": 0.2237, "step": 3486 }, { "epoch": 4.13, "learning_rate": 0.00012550482179323963, "loss": 0.2696, "step": 3487 }, { "epoch": 4.13, "learning_rate": 0.00012546830921920617, "loss": 0.2078, "step": 3488 }, { "epoch": 4.13, "learning_rate": 0.00012543179301394744, "loss": 0.2199, "step": 3489 }, { "epoch": 4.13, "learning_rate": 0.0001253952731826697, "loss": 0.2258, "step": 3490 }, { "epoch": 4.13, "learning_rate": 0.00012535874973057997, "loss": 0.1981, "step": 3491 }, { "epoch": 4.13, "learning_rate": 0.0001253222226628857, "loss": 0.3252, "step": 3492 }, { "epoch": 4.14, "learning_rate": 0.00012528569198479481, "loss": 0.2717, "step": 3493 }, { "epoch": 4.14, "learning_rate": 0.0001252491577015158, "loss": 0.248, "step": 3494 }, { "epoch": 4.14, "learning_rate": 0.00012521261981825768, "loss": 0.2725, "step": 3495 }, { "epoch": 4.14, "learning_rate": 0.00012517607834022993, "loss": 0.2203, "step": 3496 }, { "epoch": 4.14, "learning_rate": 0.0001251395332726426, "loss": 0.2461, "step": 3497 }, { "epoch": 4.14, "learning_rate": 0.00012510298462070619, "loss": 0.3018, "step": 3498 }, { "epoch": 4.14, "learning_rate": 0.0001250664323896317, "loss": 0.2329, "step": 3499 }, { "epoch": 4.14, "learning_rate": 0.00012502987658463075, "loss": 0.221, "step": 3500 }, { "epoch": 4.15, "learning_rate": 0.00012499331721091544, "loss": 0.2812, "step": 3501 }, { "epoch": 4.15, "learning_rate": 0.00012495675427369823, "loss": 0.2846, "step": 3502 }, { "epoch": 4.15, "learning_rate": 0.00012492018777819226, "loss": 0.2447, "step": 3503 }, { "epoch": 4.15, "learning_rate": 0.0001248836177296111, "loss": 0.1969, "step": 3504 }, { "epoch": 4.15, "learning_rate": 0.00012484704413316878, "loss": 0.2045, "step": 3505 }, { "epoch": 4.15, "learning_rate": 0.00012481046699408004, "loss": 0.1862, "step": 3506 }, { "epoch": 4.15, "learning_rate": 0.00012477388631755985, "loss": 0.23, "step": 3507 }, { "epoch": 4.15, "learning_rate": 0.0001247373021088239, "loss": 0.2972, "step": 3508 }, { "epoch": 4.15, "learning_rate": 0.00012470071437308827, "loss": 0.2222, "step": 3509 }, { "epoch": 4.16, "learning_rate": 0.00012466412311556952, "loss": 0.2262, "step": 3510 }, { "epoch": 4.16, "learning_rate": 0.00012462752834148486, "loss": 0.3642, "step": 3511 }, { "epoch": 4.16, "learning_rate": 0.0001245909300560518, "loss": 0.2221, "step": 3512 }, { "epoch": 4.16, "learning_rate": 0.00012455432826448862, "loss": 0.2607, "step": 3513 }, { "epoch": 4.16, "learning_rate": 0.00012451772297201376, "loss": 0.2396, "step": 3514 }, { "epoch": 4.16, "learning_rate": 0.00012448111418384645, "loss": 0.2034, "step": 3515 }, { "epoch": 4.16, "learning_rate": 0.00012444450190520623, "loss": 0.2404, "step": 3516 }, { "epoch": 4.16, "learning_rate": 0.00012440788614131329, "loss": 0.2029, "step": 3517 }, { "epoch": 4.17, "learning_rate": 0.00012437126689738816, "loss": 0.2128, "step": 3518 }, { "epoch": 4.17, "learning_rate": 0.00012433464417865202, "loss": 0.2857, "step": 3519 }, { "epoch": 4.17, "learning_rate": 0.0001242980179903264, "loss": 0.2931, "step": 3520 }, { "epoch": 4.17, "learning_rate": 0.00012426138833763342, "loss": 0.2319, "step": 3521 }, { "epoch": 4.17, "learning_rate": 0.00012422475522579573, "loss": 0.2272, "step": 3522 }, { "epoch": 4.17, "learning_rate": 0.00012418811866003632, "loss": 0.2498, "step": 3523 }, { "epoch": 4.17, "learning_rate": 0.00012415147864557884, "loss": 0.1993, "step": 3524 }, { "epoch": 4.17, "learning_rate": 0.0001241148351876473, "loss": 0.2329, "step": 3525 }, { "epoch": 4.18, "learning_rate": 0.0001240781882914663, "loss": 0.2228, "step": 3526 }, { "epoch": 4.18, "learning_rate": 0.00012404153796226087, "loss": 0.2228, "step": 3527 }, { "epoch": 4.18, "learning_rate": 0.00012400488420525653, "loss": 0.2277, "step": 3528 }, { "epoch": 4.18, "learning_rate": 0.0001239682270256793, "loss": 0.2344, "step": 3529 }, { "epoch": 4.18, "learning_rate": 0.0001239315664287558, "loss": 0.2043, "step": 3530 }, { "epoch": 4.18, "learning_rate": 0.0001238949024197129, "loss": 0.2143, "step": 3531 }, { "epoch": 4.18, "learning_rate": 0.00012385823500377812, "loss": 0.2054, "step": 3532 }, { "epoch": 4.18, "learning_rate": 0.00012382156418617947, "loss": 0.2191, "step": 3533 }, { "epoch": 4.18, "learning_rate": 0.0001237848899721454, "loss": 0.2199, "step": 3534 }, { "epoch": 4.19, "learning_rate": 0.00012374821236690482, "loss": 0.1899, "step": 3535 }, { "epoch": 4.19, "learning_rate": 0.0001237115313756872, "loss": 0.2206, "step": 3536 }, { "epoch": 4.19, "learning_rate": 0.00012367484700372242, "loss": 0.2107, "step": 3537 }, { "epoch": 4.19, "learning_rate": 0.00012363815925624087, "loss": 0.1904, "step": 3538 }, { "epoch": 4.19, "learning_rate": 0.00012360146813847345, "loss": 0.2259, "step": 3539 }, { "epoch": 4.19, "learning_rate": 0.00012356477365565148, "loss": 0.2488, "step": 3540 }, { "epoch": 4.19, "learning_rate": 0.00012352807581300678, "loss": 0.3026, "step": 3541 }, { "epoch": 4.19, "learning_rate": 0.00012349137461577174, "loss": 0.2141, "step": 3542 }, { "epoch": 4.2, "learning_rate": 0.00012345467006917907, "loss": 0.2183, "step": 3543 }, { "epoch": 4.2, "learning_rate": 0.00012341796217846208, "loss": 0.2978, "step": 3544 }, { "epoch": 4.2, "learning_rate": 0.0001233812509488545, "loss": 0.2255, "step": 3545 }, { "epoch": 4.2, "learning_rate": 0.00012334453638559057, "loss": 0.2209, "step": 3546 }, { "epoch": 4.2, "learning_rate": 0.00012330781849390494, "loss": 0.2464, "step": 3547 }, { "epoch": 4.2, "learning_rate": 0.00012327109727903283, "loss": 0.2259, "step": 3548 }, { "epoch": 4.2, "learning_rate": 0.00012323437274620983, "loss": 0.209, "step": 3549 }, { "epoch": 4.2, "learning_rate": 0.0001231976449006721, "loss": 0.2424, "step": 3550 }, { "epoch": 4.21, "learning_rate": 0.00012316091374765624, "loss": 0.2162, "step": 3551 }, { "epoch": 4.21, "learning_rate": 0.0001231241792923993, "loss": 0.2442, "step": 3552 }, { "epoch": 4.21, "learning_rate": 0.00012308744154013878, "loss": 0.2061, "step": 3553 }, { "epoch": 4.21, "eval_loss": 3.390720844268799, "eval_runtime": 283.8935, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 3553 }, { "epoch": 4.21, "learning_rate": 0.00012305070049611273, "loss": 0.1838, "step": 3554 }, { "epoch": 4.21, "learning_rate": 0.00012301395616555957, "loss": 0.197, "step": 3555 }, { "epoch": 4.21, "learning_rate": 0.0001229772085537183, "loss": 0.2479, "step": 3556 }, { "epoch": 4.21, "learning_rate": 0.00012294045766582823, "loss": 0.3272, "step": 3557 }, { "epoch": 4.21, "learning_rate": 0.00012290370350712937, "loss": 0.2301, "step": 3558 }, { "epoch": 4.21, "learning_rate": 0.00012286694608286197, "loss": 0.2367, "step": 3559 }, { "epoch": 4.22, "learning_rate": 0.00012283018539826685, "loss": 0.2419, "step": 3560 }, { "epoch": 4.22, "learning_rate": 0.0001227934214585853, "loss": 0.2605, "step": 3561 }, { "epoch": 4.22, "learning_rate": 0.000122756654269059, "loss": 0.2084, "step": 3562 }, { "epoch": 4.22, "learning_rate": 0.00012271988383493024, "loss": 0.2414, "step": 3563 }, { "epoch": 4.22, "learning_rate": 0.00012268311016144163, "loss": 0.2206, "step": 3564 }, { "epoch": 4.22, "learning_rate": 0.0001226463332538363, "loss": 0.2012, "step": 3565 }, { "epoch": 4.22, "learning_rate": 0.00012260955311735786, "loss": 0.1884, "step": 3566 }, { "epoch": 4.22, "learning_rate": 0.00012257276975725028, "loss": 0.2155, "step": 3567 }, { "epoch": 4.23, "learning_rate": 0.0001225359831787581, "loss": 0.2375, "step": 3568 }, { "epoch": 4.23, "learning_rate": 0.00012249919338712636, "loss": 0.2713, "step": 3569 }, { "epoch": 4.23, "learning_rate": 0.00012246240038760043, "loss": 0.2414, "step": 3570 }, { "epoch": 4.23, "learning_rate": 0.00012242560418542612, "loss": 0.2209, "step": 3571 }, { "epoch": 4.23, "learning_rate": 0.00012238880478584985, "loss": 0.2318, "step": 3572 }, { "epoch": 4.23, "learning_rate": 0.00012235200219411836, "loss": 0.2858, "step": 3573 }, { "epoch": 4.23, "learning_rate": 0.000122315196415479, "loss": 0.2468, "step": 3574 }, { "epoch": 4.23, "learning_rate": 0.00012227838745517932, "loss": 0.2166, "step": 3575 }, { "epoch": 4.24, "learning_rate": 0.0001222415753184676, "loss": 0.2349, "step": 3576 }, { "epoch": 4.24, "learning_rate": 0.00012220476001059238, "loss": 0.2486, "step": 3577 }, { "epoch": 4.24, "learning_rate": 0.00012216794153680274, "loss": 0.234, "step": 3578 }, { "epoch": 4.24, "learning_rate": 0.00012213111990234815, "loss": 0.2008, "step": 3579 }, { "epoch": 4.24, "learning_rate": 0.00012209429511247864, "loss": 0.2548, "step": 3580 }, { "epoch": 4.24, "learning_rate": 0.0001220574671724446, "loss": 0.2562, "step": 3581 }, { "epoch": 4.24, "learning_rate": 0.0001220206360874969, "loss": 0.2586, "step": 3582 }, { "epoch": 4.24, "learning_rate": 0.0001219838018628868, "loss": 0.2428, "step": 3583 }, { "epoch": 4.24, "learning_rate": 0.00012194696450386608, "loss": 0.2159, "step": 3584 }, { "epoch": 4.25, "learning_rate": 0.00012191012401568698, "loss": 0.2544, "step": 3585 }, { "epoch": 4.25, "learning_rate": 0.0001218732804036021, "loss": 0.2396, "step": 3586 }, { "epoch": 4.25, "learning_rate": 0.00012183643367286462, "loss": 0.2335, "step": 3587 }, { "epoch": 4.25, "learning_rate": 0.00012179958382872796, "loss": 0.2275, "step": 3588 }, { "epoch": 4.25, "learning_rate": 0.00012176273087644619, "loss": 0.2291, "step": 3589 }, { "epoch": 4.25, "learning_rate": 0.0001217258748212737, "loss": 0.2272, "step": 3590 }, { "epoch": 4.25, "learning_rate": 0.00012168901566846535, "loss": 0.2135, "step": 3591 }, { "epoch": 4.25, "learning_rate": 0.00012165215342327648, "loss": 0.22, "step": 3592 }, { "epoch": 4.26, "learning_rate": 0.00012161528809096285, "loss": 0.2577, "step": 3593 }, { "epoch": 4.26, "learning_rate": 0.00012157841967678063, "loss": 0.2006, "step": 3594 }, { "epoch": 4.26, "learning_rate": 0.00012154154818598647, "loss": 0.2322, "step": 3595 }, { "epoch": 4.26, "learning_rate": 0.0001215046736238374, "loss": 0.196, "step": 3596 }, { "epoch": 4.26, "learning_rate": 0.00012146779599559095, "loss": 0.2267, "step": 3597 }, { "epoch": 4.26, "learning_rate": 0.00012143091530650508, "loss": 0.2416, "step": 3598 }, { "epoch": 4.26, "learning_rate": 0.00012139403156183817, "loss": 0.2585, "step": 3599 }, { "epoch": 4.26, "learning_rate": 0.00012135714476684903, "loss": 0.2644, "step": 3600 }, { "epoch": 4.27, "learning_rate": 0.00012132025492679693, "loss": 0.2355, "step": 3601 }, { "epoch": 4.27, "learning_rate": 0.00012128336204694148, "loss": 0.2363, "step": 3602 }, { "epoch": 4.27, "learning_rate": 0.00012124646613254291, "loss": 0.2476, "step": 3603 }, { "epoch": 4.27, "learning_rate": 0.0001212095671888617, "loss": 0.2185, "step": 3604 }, { "epoch": 4.27, "learning_rate": 0.00012117266522115889, "loss": 0.2233, "step": 3605 }, { "epoch": 4.27, "learning_rate": 0.00012113576023469582, "loss": 0.2084, "step": 3606 }, { "epoch": 4.27, "learning_rate": 0.00012109885223473439, "loss": 0.2439, "step": 3607 }, { "epoch": 4.27, "learning_rate": 0.00012106194122653684, "loss": 0.2409, "step": 3608 }, { "epoch": 4.27, "learning_rate": 0.00012102502721536595, "loss": 0.2183, "step": 3609 }, { "epoch": 4.28, "learning_rate": 0.00012098811020648475, "loss": 0.2595, "step": 3610 }, { "epoch": 4.28, "learning_rate": 0.00012095119020515691, "loss": 0.2135, "step": 3611 }, { "epoch": 4.28, "learning_rate": 0.0001209142672166463, "loss": 0.2125, "step": 3612 }, { "epoch": 4.28, "learning_rate": 0.00012087734124621742, "loss": 0.2017, "step": 3613 }, { "epoch": 4.28, "learning_rate": 0.00012084041229913505, "loss": 0.2163, "step": 3614 }, { "epoch": 4.28, "learning_rate": 0.00012080348038066452, "loss": 0.2198, "step": 3615 }, { "epoch": 4.28, "learning_rate": 0.00012076654549607145, "loss": 0.2234, "step": 3616 }, { "epoch": 4.28, "learning_rate": 0.00012072960765062197, "loss": 0.2201, "step": 3617 }, { "epoch": 4.29, "learning_rate": 0.00012069266684958265, "loss": 0.245, "step": 3618 }, { "epoch": 4.29, "learning_rate": 0.00012065572309822037, "loss": 0.2067, "step": 3619 }, { "epoch": 4.29, "learning_rate": 0.00012061877640180255, "loss": 0.2284, "step": 3620 }, { "epoch": 4.29, "learning_rate": 0.000120581826765597, "loss": 0.2323, "step": 3621 }, { "epoch": 4.29, "learning_rate": 0.00012054487419487188, "loss": 0.2162, "step": 3622 }, { "epoch": 4.29, "learning_rate": 0.00012050791869489586, "loss": 0.2131, "step": 3623 }, { "epoch": 4.29, "learning_rate": 0.00012047096027093798, "loss": 0.2168, "step": 3624 }, { "epoch": 4.29, "learning_rate": 0.00012043399892826768, "loss": 0.2293, "step": 3625 }, { "epoch": 4.3, "learning_rate": 0.00012039703467215488, "loss": 0.2202, "step": 3626 }, { "epoch": 4.3, "learning_rate": 0.00012036006750786985, "loss": 0.2288, "step": 3627 }, { "epoch": 4.3, "learning_rate": 0.00012032309744068334, "loss": 0.2606, "step": 3628 }, { "epoch": 4.3, "learning_rate": 0.00012028612447586643, "loss": 0.2754, "step": 3629 }, { "epoch": 4.3, "learning_rate": 0.00012024914861869063, "loss": 0.239, "step": 3630 }, { "epoch": 4.3, "learning_rate": 0.00012021216987442798, "loss": 0.2312, "step": 3631 }, { "epoch": 4.3, "learning_rate": 0.00012017518824835077, "loss": 0.2299, "step": 3632 }, { "epoch": 4.3, "learning_rate": 0.00012013820374573184, "loss": 0.2214, "step": 3633 }, { "epoch": 4.3, "learning_rate": 0.00012010121637184428, "loss": 0.2492, "step": 3634 }, { "epoch": 4.31, "learning_rate": 0.00012006422613196178, "loss": 0.2659, "step": 3635 }, { "epoch": 4.31, "learning_rate": 0.00012002723303135826, "loss": 0.23, "step": 3636 }, { "epoch": 4.31, "learning_rate": 0.00011999023707530819, "loss": 0.287, "step": 3637 }, { "epoch": 4.31, "learning_rate": 0.00011995323826908635, "loss": 0.2204, "step": 3638 }, { "epoch": 4.31, "learning_rate": 0.00011991623661796798, "loss": 0.2277, "step": 3639 }, { "epoch": 4.31, "learning_rate": 0.00011987923212722872, "loss": 0.2436, "step": 3640 }, { "epoch": 4.31, "learning_rate": 0.00011984222480214456, "loss": 0.2074, "step": 3641 }, { "epoch": 4.31, "learning_rate": 0.00011980521464799198, "loss": 0.2212, "step": 3642 }, { "epoch": 4.32, "learning_rate": 0.00011976820167004779, "loss": 0.2147, "step": 3643 }, { "epoch": 4.32, "learning_rate": 0.00011973118587358928, "loss": 0.2271, "step": 3644 }, { "epoch": 4.32, "learning_rate": 0.00011969416726389404, "loss": 0.2498, "step": 3645 }, { "epoch": 4.32, "learning_rate": 0.00011965714584624012, "loss": 0.2171, "step": 3646 }, { "epoch": 4.32, "learning_rate": 0.00011962012162590601, "loss": 0.2276, "step": 3647 }, { "epoch": 4.32, "learning_rate": 0.00011958309460817052, "loss": 0.2089, "step": 3648 }, { "epoch": 4.32, "learning_rate": 0.00011954606479831291, "loss": 0.2691, "step": 3649 }, { "epoch": 4.32, "learning_rate": 0.00011950903220161285, "loss": 0.2229, "step": 3650 }, { "epoch": 4.33, "learning_rate": 0.00011947199682335031, "loss": 0.2315, "step": 3651 }, { "epoch": 4.33, "learning_rate": 0.0001194349586688058, "loss": 0.2208, "step": 3652 }, { "epoch": 4.33, "learning_rate": 0.0001193979177432601, "loss": 0.2159, "step": 3653 }, { "epoch": 4.33, "learning_rate": 0.00011936087405199446, "loss": 0.2781, "step": 3654 }, { "epoch": 4.33, "learning_rate": 0.00011932382760029049, "loss": 0.2142, "step": 3655 }, { "epoch": 4.33, "learning_rate": 0.00011928677839343026, "loss": 0.2275, "step": 3656 }, { "epoch": 4.33, "learning_rate": 0.0001192497264366961, "loss": 0.2718, "step": 3657 }, { "epoch": 4.33, "learning_rate": 0.00011921267173537086, "loss": 0.1947, "step": 3658 }, { "epoch": 4.33, "learning_rate": 0.00011917561429473771, "loss": 0.2361, "step": 3659 }, { "epoch": 4.34, "learning_rate": 0.00011913855412008023, "loss": 0.1999, "step": 3660 }, { "epoch": 4.34, "learning_rate": 0.00011910149121668241, "loss": 0.2199, "step": 3661 }, { "epoch": 4.34, "learning_rate": 0.00011906442558982865, "loss": 0.2217, "step": 3662 }, { "epoch": 4.34, "learning_rate": 0.0001190273572448036, "loss": 0.2263, "step": 3663 }, { "epoch": 4.34, "learning_rate": 0.00011899028618689247, "loss": 0.2216, "step": 3664 }, { "epoch": 4.34, "learning_rate": 0.00011895321242138075, "loss": 0.2298, "step": 3665 }, { "epoch": 4.34, "learning_rate": 0.0001189161359535544, "loss": 0.2332, "step": 3666 }, { "epoch": 4.34, "learning_rate": 0.00011887905678869966, "loss": 0.2955, "step": 3667 }, { "epoch": 4.35, "learning_rate": 0.00011884197493210328, "loss": 0.2352, "step": 3668 }, { "epoch": 4.35, "learning_rate": 0.00011880489038905223, "loss": 0.2104, "step": 3669 }, { "epoch": 4.35, "learning_rate": 0.00011876780316483401, "loss": 0.2897, "step": 3670 }, { "epoch": 4.35, "learning_rate": 0.00011873071326473644, "loss": 0.2041, "step": 3671 }, { "epoch": 4.35, "learning_rate": 0.00011869362069404775, "loss": 0.2242, "step": 3672 }, { "epoch": 4.35, "learning_rate": 0.0001186565254580565, "loss": 0.2015, "step": 3673 }, { "epoch": 4.35, "learning_rate": 0.00011861942756205169, "loss": 0.2716, "step": 3674 }, { "epoch": 4.35, "learning_rate": 0.00011858232701132264, "loss": 0.2504, "step": 3675 }, { "epoch": 4.36, "learning_rate": 0.00011854522381115908, "loss": 0.1846, "step": 3676 }, { "epoch": 4.36, "learning_rate": 0.00011850811796685117, "loss": 0.207, "step": 3677 }, { "epoch": 4.36, "learning_rate": 0.00011847100948368937, "loss": 0.2228, "step": 3678 }, { "epoch": 4.36, "learning_rate": 0.00011843389836696447, "loss": 0.2365, "step": 3679 }, { "epoch": 4.36, "learning_rate": 0.00011839678462196784, "loss": 0.2159, "step": 3680 }, { "epoch": 4.36, "learning_rate": 0.00011835966825399096, "loss": 0.2413, "step": 3681 }, { "epoch": 4.36, "learning_rate": 0.00011832254926832586, "loss": 0.2596, "step": 3682 }, { "epoch": 4.36, "learning_rate": 0.00011828542767026493, "loss": 0.2041, "step": 3683 }, { "epoch": 4.36, "learning_rate": 0.00011824830346510089, "loss": 0.2512, "step": 3684 }, { "epoch": 4.37, "learning_rate": 0.00011821117665812682, "loss": 0.2165, "step": 3685 }, { "epoch": 4.37, "learning_rate": 0.00011817404725463618, "loss": 0.2125, "step": 3686 }, { "epoch": 4.37, "learning_rate": 0.00011813691525992286, "loss": 0.2557, "step": 3687 }, { "epoch": 4.37, "learning_rate": 0.00011809978067928102, "loss": 0.2088, "step": 3688 }, { "epoch": 4.37, "learning_rate": 0.00011806264351800526, "loss": 0.2093, "step": 3689 }, { "epoch": 4.37, "learning_rate": 0.0001180255037813906, "loss": 0.2217, "step": 3690 }, { "epoch": 4.37, "learning_rate": 0.00011798836147473225, "loss": 0.2681, "step": 3691 }, { "epoch": 4.37, "learning_rate": 0.00011795121660332593, "loss": 0.2257, "step": 3692 }, { "epoch": 4.38, "learning_rate": 0.0001179140691724677, "loss": 0.2422, "step": 3693 }, { "epoch": 4.38, "learning_rate": 0.00011787691918745396, "loss": 0.3328, "step": 3694 }, { "epoch": 4.38, "learning_rate": 0.0001178397666535815, "loss": 0.233, "step": 3695 }, { "epoch": 4.38, "learning_rate": 0.00011780261157614747, "loss": 0.243, "step": 3696 }, { "epoch": 4.38, "learning_rate": 0.00011776545396044936, "loss": 0.2089, "step": 3697 }, { "epoch": 4.38, "learning_rate": 0.00011772829381178502, "loss": 0.2143, "step": 3698 }, { "epoch": 4.38, "learning_rate": 0.00011769113113545267, "loss": 0.2135, "step": 3699 }, { "epoch": 4.38, "learning_rate": 0.00011765396593675097, "loss": 0.2403, "step": 3700 }, { "epoch": 4.39, "learning_rate": 0.00011761679822097877, "loss": 0.2182, "step": 3701 }, { "epoch": 4.39, "learning_rate": 0.00011757962799343547, "loss": 0.2159, "step": 3702 }, { "epoch": 4.39, "learning_rate": 0.00011754245525942065, "loss": 0.2098, "step": 3703 }, { "epoch": 4.39, "learning_rate": 0.00011750528002423437, "loss": 0.2264, "step": 3704 }, { "epoch": 4.39, "learning_rate": 0.000117468102293177, "loss": 0.2023, "step": 3705 }, { "epoch": 4.39, "learning_rate": 0.00011743092207154929, "loss": 0.2978, "step": 3706 }, { "epoch": 4.39, "learning_rate": 0.0001173937393646523, "loss": 0.2311, "step": 3707 }, { "epoch": 4.39, "learning_rate": 0.0001173565541777875, "loss": 0.244, "step": 3708 }, { "epoch": 4.39, "learning_rate": 0.00011731936651625668, "loss": 0.2058, "step": 3709 }, { "epoch": 4.4, "learning_rate": 0.00011728217638536197, "loss": 0.3039, "step": 3710 }, { "epoch": 4.4, "learning_rate": 0.00011724498379040587, "loss": 0.2142, "step": 3711 }, { "epoch": 4.4, "learning_rate": 0.0001172077887366913, "loss": 0.2262, "step": 3712 }, { "epoch": 4.4, "learning_rate": 0.00011717059122952136, "loss": 0.2304, "step": 3713 }, { "epoch": 4.4, "learning_rate": 0.00011713339127419969, "loss": 0.2093, "step": 3714 }, { "epoch": 4.4, "learning_rate": 0.00011709618887603014, "loss": 0.2083, "step": 3715 }, { "epoch": 4.4, "learning_rate": 0.00011705898404031697, "loss": 0.3559, "step": 3716 }, { "epoch": 4.4, "learning_rate": 0.00011702177677236479, "loss": 0.2728, "step": 3717 }, { "epoch": 4.41, "learning_rate": 0.00011698456707747854, "loss": 0.246, "step": 3718 }, { "epoch": 4.41, "learning_rate": 0.00011694735496096354, "loss": 0.2031, "step": 3719 }, { "epoch": 4.41, "learning_rate": 0.00011691014042812536, "loss": 0.2049, "step": 3720 }, { "epoch": 4.41, "learning_rate": 0.00011687292348427004, "loss": 0.248, "step": 3721 }, { "epoch": 4.41, "learning_rate": 0.00011683570413470383, "loss": 0.2189, "step": 3722 }, { "epoch": 4.41, "learning_rate": 0.00011679848238473352, "loss": 0.2302, "step": 3723 }, { "epoch": 4.41, "learning_rate": 0.00011676125823966602, "loss": 0.2839, "step": 3724 }, { "epoch": 4.41, "learning_rate": 0.00011672403170480872, "loss": 0.2359, "step": 3725 }, { "epoch": 4.42, "learning_rate": 0.00011668680278546929, "loss": 0.2288, "step": 3726 }, { "epoch": 4.42, "learning_rate": 0.0001166495714869558, "loss": 0.2718, "step": 3727 }, { "epoch": 4.42, "learning_rate": 0.00011661233781457654, "loss": 0.1967, "step": 3728 }, { "epoch": 4.42, "learning_rate": 0.00011657510177364032, "loss": 0.2098, "step": 3729 }, { "epoch": 4.42, "learning_rate": 0.00011653786336945614, "loss": 0.2466, "step": 3730 }, { "epoch": 4.42, "learning_rate": 0.00011650062260733339, "loss": 0.2207, "step": 3731 }, { "epoch": 4.42, "learning_rate": 0.00011646337949258175, "loss": 0.2124, "step": 3732 }, { "epoch": 4.42, "learning_rate": 0.00011642613403051133, "loss": 0.213, "step": 3733 }, { "epoch": 4.42, "learning_rate": 0.00011638888622643249, "loss": 0.2276, "step": 3734 }, { "epoch": 4.43, "learning_rate": 0.000116351636085656, "loss": 0.2206, "step": 3735 }, { "epoch": 4.43, "learning_rate": 0.00011631438361349287, "loss": 0.2382, "step": 3736 }, { "epoch": 4.43, "learning_rate": 0.00011627712881525452, "loss": 0.2264, "step": 3737 }, { "epoch": 4.43, "learning_rate": 0.00011623987169625261, "loss": 0.2392, "step": 3738 }, { "epoch": 4.43, "learning_rate": 0.00011620261226179927, "loss": 0.2139, "step": 3739 }, { "epoch": 4.43, "learning_rate": 0.00011616535051720685, "loss": 0.2103, "step": 3740 }, { "epoch": 4.43, "learning_rate": 0.00011612808646778806, "loss": 0.211, "step": 3741 }, { "epoch": 4.43, "learning_rate": 0.00011609082011885592, "loss": 0.2227, "step": 3742 }, { "epoch": 4.44, "learning_rate": 0.00011605355147572387, "loss": 0.2459, "step": 3743 }, { "epoch": 4.44, "learning_rate": 0.00011601628054370553, "loss": 0.2312, "step": 3744 }, { "epoch": 4.44, "learning_rate": 0.00011597900732811496, "loss": 0.2244, "step": 3745 }, { "epoch": 4.44, "learning_rate": 0.00011594173183426647, "loss": 0.2168, "step": 3746 }, { "epoch": 4.44, "learning_rate": 0.00011590445406747479, "loss": 0.2711, "step": 3747 }, { "epoch": 4.44, "learning_rate": 0.00011586717403305487, "loss": 0.1865, "step": 3748 }, { "epoch": 4.44, "learning_rate": 0.00011582989173632206, "loss": 0.3104, "step": 3749 }, { "epoch": 4.44, "learning_rate": 0.00011579260718259197, "loss": 0.2245, "step": 3750 }, { "epoch": 4.45, "learning_rate": 0.00011575532037718057, "loss": 0.2316, "step": 3751 }, { "epoch": 4.45, "learning_rate": 0.00011571803132540418, "loss": 0.2328, "step": 3752 }, { "epoch": 4.45, "learning_rate": 0.00011568074003257938, "loss": 0.267, "step": 3753 }, { "epoch": 4.45, "learning_rate": 0.0001156434465040231, "loss": 0.2131, "step": 3754 }, { "epoch": 4.45, "learning_rate": 0.0001156061507450526, "loss": 0.1945, "step": 3755 }, { "epoch": 4.45, "learning_rate": 0.00011556885276098536, "loss": 0.2344, "step": 3756 }, { "epoch": 4.45, "learning_rate": 0.00011553155255713937, "loss": 0.2221, "step": 3757 }, { "epoch": 4.45, "learning_rate": 0.00011549425013883275, "loss": 0.2098, "step": 3758 }, { "epoch": 4.45, "learning_rate": 0.00011545694551138409, "loss": 0.2329, "step": 3759 }, { "epoch": 4.46, "learning_rate": 0.00011541963868011212, "loss": 0.2187, "step": 3760 }, { "epoch": 4.46, "learning_rate": 0.00011538232965033601, "loss": 0.1928, "step": 3761 }, { "epoch": 4.46, "learning_rate": 0.00011534501842737527, "loss": 0.2103, "step": 3762 }, { "epoch": 4.46, "eval_loss": 3.44382643699646, "eval_runtime": 283.899, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 3762 }, { "epoch": 4.46, "learning_rate": 0.00011530770501654959, "loss": 0.2563, "step": 3763 }, { "epoch": 4.46, "learning_rate": 0.00011527038942317911, "loss": 0.1922, "step": 3764 }, { "epoch": 4.46, "learning_rate": 0.00011523307165258419, "loss": 0.2246, "step": 3765 }, { "epoch": 4.46, "learning_rate": 0.00011519575171008552, "loss": 0.2243, "step": 3766 }, { "epoch": 4.46, "learning_rate": 0.00011515842960100411, "loss": 0.2481, "step": 3767 }, { "epoch": 4.47, "learning_rate": 0.00011512110533066132, "loss": 0.2135, "step": 3768 }, { "epoch": 4.47, "learning_rate": 0.00011508377890437874, "loss": 0.2019, "step": 3769 }, { "epoch": 4.47, "learning_rate": 0.00011504645032747832, "loss": 0.2537, "step": 3770 }, { "epoch": 4.47, "learning_rate": 0.00011500911960528229, "loss": 0.2131, "step": 3771 }, { "epoch": 4.47, "learning_rate": 0.00011497178674311317, "loss": 0.2421, "step": 3772 }, { "epoch": 4.47, "learning_rate": 0.00011493445174629386, "loss": 0.2012, "step": 3773 }, { "epoch": 4.47, "learning_rate": 0.00011489711462014751, "loss": 0.2144, "step": 3774 }, { "epoch": 4.47, "learning_rate": 0.00011485977536999757, "loss": 0.2411, "step": 3775 }, { "epoch": 4.48, "learning_rate": 0.00011482243400116779, "loss": 0.192, "step": 3776 }, { "epoch": 4.48, "learning_rate": 0.00011478509051898225, "loss": 0.2245, "step": 3777 }, { "epoch": 4.48, "learning_rate": 0.00011474774492876532, "loss": 0.241, "step": 3778 }, { "epoch": 4.48, "learning_rate": 0.00011471039723584162, "loss": 0.2172, "step": 3779 }, { "epoch": 4.48, "learning_rate": 0.00011467304744553618, "loss": 0.2308, "step": 3780 }, { "epoch": 4.48, "learning_rate": 0.00011463569556317424, "loss": 0.2523, "step": 3781 }, { "epoch": 4.48, "learning_rate": 0.00011459834159408137, "loss": 0.216, "step": 3782 }, { "epoch": 4.48, "learning_rate": 0.00011456098554358342, "loss": 0.2098, "step": 3783 }, { "epoch": 4.48, "learning_rate": 0.00011452362741700655, "loss": 0.2101, "step": 3784 }, { "epoch": 4.49, "learning_rate": 0.00011448626721967717, "loss": 0.3598, "step": 3785 }, { "epoch": 4.49, "learning_rate": 0.00011444890495692213, "loss": 0.2131, "step": 3786 }, { "epoch": 4.49, "learning_rate": 0.00011441154063406841, "loss": 0.3067, "step": 3787 }, { "epoch": 4.49, "learning_rate": 0.00011437417425644337, "loss": 0.2866, "step": 3788 }, { "epoch": 4.49, "learning_rate": 0.00011433680582937461, "loss": 0.2688, "step": 3789 }, { "epoch": 4.49, "learning_rate": 0.00011429943535819005, "loss": 0.2286, "step": 3790 }, { "epoch": 4.49, "learning_rate": 0.00011426206284821792, "loss": 0.215, "step": 3791 }, { "epoch": 4.49, "learning_rate": 0.00011422468830478679, "loss": 0.2293, "step": 3792 }, { "epoch": 4.5, "learning_rate": 0.00011418731173322532, "loss": 0.2614, "step": 3793 }, { "epoch": 4.5, "learning_rate": 0.00011414993313886272, "loss": 0.2223, "step": 3794 }, { "epoch": 4.5, "learning_rate": 0.00011411255252702829, "loss": 0.2415, "step": 3795 }, { "epoch": 4.5, "learning_rate": 0.00011407516990305169, "loss": 0.2429, "step": 3796 }, { "epoch": 4.5, "learning_rate": 0.0001140377852722629, "loss": 0.2862, "step": 3797 }, { "epoch": 4.5, "learning_rate": 0.00011400039863999214, "loss": 0.2399, "step": 3798 }, { "epoch": 4.5, "learning_rate": 0.00011396301001156992, "loss": 0.915, "step": 3799 }, { "epoch": 4.5, "learning_rate": 0.00011392561939232706, "loss": 0.2398, "step": 3800 }, { "epoch": 4.51, "learning_rate": 0.00011388822678759464, "loss": 0.2817, "step": 3801 }, { "epoch": 4.51, "learning_rate": 0.00011385083220270401, "loss": 0.2224, "step": 3802 }, { "epoch": 4.51, "learning_rate": 0.00011381343564298683, "loss": 0.2319, "step": 3803 }, { "epoch": 4.51, "learning_rate": 0.00011377603711377504, "loss": 0.2269, "step": 3804 }, { "epoch": 4.51, "learning_rate": 0.00011373863662040087, "loss": 0.2552, "step": 3805 }, { "epoch": 4.51, "learning_rate": 0.00011370123416819682, "loss": 0.2335, "step": 3806 }, { "epoch": 4.51, "learning_rate": 0.00011366382976249564, "loss": 0.2197, "step": 3807 }, { "epoch": 4.51, "learning_rate": 0.00011362642340863034, "loss": 0.2433, "step": 3808 }, { "epoch": 4.52, "learning_rate": 0.00011358901511193431, "loss": 0.2135, "step": 3809 }, { "epoch": 4.52, "learning_rate": 0.0001135516048777412, "loss": 0.2488, "step": 3810 }, { "epoch": 4.52, "learning_rate": 0.0001135141927113848, "loss": 0.2426, "step": 3811 }, { "epoch": 4.52, "learning_rate": 0.0001134767786181993, "loss": 0.247, "step": 3812 }, { "epoch": 4.52, "learning_rate": 0.00011343936260351913, "loss": 0.2235, "step": 3813 }, { "epoch": 4.52, "learning_rate": 0.00011340194467267901, "loss": 0.2109, "step": 3814 }, { "epoch": 4.52, "learning_rate": 0.00011336452483101394, "loss": 0.2545, "step": 3815 }, { "epoch": 4.52, "learning_rate": 0.00011332710308385914, "loss": 0.2104, "step": 3816 }, { "epoch": 4.52, "learning_rate": 0.00011328967943655016, "loss": 0.2089, "step": 3817 }, { "epoch": 4.53, "learning_rate": 0.00011325225389442277, "loss": 0.2658, "step": 3818 }, { "epoch": 4.53, "learning_rate": 0.00011321482646281301, "loss": 0.2736, "step": 3819 }, { "epoch": 4.53, "learning_rate": 0.00011317739714705731, "loss": 0.2562, "step": 3820 }, { "epoch": 4.53, "learning_rate": 0.00011313996595249219, "loss": 0.2223, "step": 3821 }, { "epoch": 4.53, "learning_rate": 0.00011310253288445456, "loss": 0.2212, "step": 3822 }, { "epoch": 4.53, "learning_rate": 0.00011306509794828153, "loss": 0.2217, "step": 3823 }, { "epoch": 4.53, "learning_rate": 0.00011302766114931054, "loss": 0.2321, "step": 3824 }, { "epoch": 4.53, "learning_rate": 0.00011299022249287922, "loss": 0.2423, "step": 3825 }, { "epoch": 4.54, "learning_rate": 0.00011295278198432557, "loss": 0.2651, "step": 3826 }, { "epoch": 4.54, "learning_rate": 0.0001129153396289877, "loss": 0.2256, "step": 3827 }, { "epoch": 4.54, "learning_rate": 0.00011287789543220417, "loss": 0.2656, "step": 3828 }, { "epoch": 4.54, "learning_rate": 0.00011284044939931364, "loss": 0.2332, "step": 3829 }, { "epoch": 4.54, "learning_rate": 0.0001128030015356551, "loss": 0.2121, "step": 3830 }, { "epoch": 4.54, "learning_rate": 0.00011276555184656783, "loss": 0.2148, "step": 3831 }, { "epoch": 4.54, "learning_rate": 0.00011272810033739135, "loss": 0.234, "step": 3832 }, { "epoch": 4.54, "learning_rate": 0.00011269064701346534, "loss": 0.2466, "step": 3833 }, { "epoch": 4.55, "learning_rate": 0.00011265319188012994, "loss": 0.2008, "step": 3834 }, { "epoch": 4.55, "learning_rate": 0.00011261573494272538, "loss": 0.1905, "step": 3835 }, { "epoch": 4.55, "learning_rate": 0.00011257827620659216, "loss": 0.2515, "step": 3836 }, { "epoch": 4.55, "learning_rate": 0.00011254081567707115, "loss": 0.2579, "step": 3837 }, { "epoch": 4.55, "learning_rate": 0.00011250335335950342, "loss": 0.2598, "step": 3838 }, { "epoch": 4.55, "learning_rate": 0.00011246588925923018, "loss": 0.2399, "step": 3839 }, { "epoch": 4.55, "learning_rate": 0.00011242842338159309, "loss": 0.2181, "step": 3840 }, { "epoch": 4.55, "learning_rate": 0.0001123909557319339, "loss": 0.2744, "step": 3841 }, { "epoch": 4.55, "learning_rate": 0.00011235348631559473, "loss": 0.2149, "step": 3842 }, { "epoch": 4.56, "learning_rate": 0.00011231601513791786, "loss": 0.2184, "step": 3843 }, { "epoch": 4.56, "learning_rate": 0.0001122785422042459, "loss": 0.2098, "step": 3844 }, { "epoch": 4.56, "learning_rate": 0.00011224106751992163, "loss": 0.2277, "step": 3845 }, { "epoch": 4.56, "learning_rate": 0.00011220359109028815, "loss": 0.2571, "step": 3846 }, { "epoch": 4.56, "learning_rate": 0.00011216611292068881, "loss": 0.2087, "step": 3847 }, { "epoch": 4.56, "learning_rate": 0.0001121286330164671, "loss": 0.2497, "step": 3848 }, { "epoch": 4.56, "learning_rate": 0.00011209115138296693, "loss": 0.1869, "step": 3849 }, { "epoch": 4.56, "learning_rate": 0.0001120536680255323, "loss": 0.239, "step": 3850 }, { "epoch": 4.57, "learning_rate": 0.00011201618294950756, "loss": 0.2018, "step": 3851 }, { "epoch": 4.57, "learning_rate": 0.00011197869616023722, "loss": 0.2751, "step": 3852 }, { "epoch": 4.57, "learning_rate": 0.00011194120766306611, "loss": 0.2526, "step": 3853 }, { "epoch": 4.57, "learning_rate": 0.00011190371746333923, "loss": 0.2657, "step": 3854 }, { "epoch": 4.57, "learning_rate": 0.00011186622556640194, "loss": 0.2659, "step": 3855 }, { "epoch": 4.57, "learning_rate": 0.00011182873197759971, "loss": 0.2401, "step": 3856 }, { "epoch": 4.57, "learning_rate": 0.00011179123670227833, "loss": 0.2299, "step": 3857 }, { "epoch": 4.57, "learning_rate": 0.00011175373974578378, "loss": 0.2249, "step": 3858 }, { "epoch": 4.58, "learning_rate": 0.00011171624111346232, "loss": 0.2457, "step": 3859 }, { "epoch": 4.58, "learning_rate": 0.00011167874081066045, "loss": 0.192, "step": 3860 }, { "epoch": 4.58, "learning_rate": 0.00011164123884272493, "loss": 0.2591, "step": 3861 }, { "epoch": 4.58, "learning_rate": 0.00011160373521500264, "loss": 0.2632, "step": 3862 }, { "epoch": 4.58, "learning_rate": 0.00011156622993284084, "loss": 0.248, "step": 3863 }, { "epoch": 4.58, "learning_rate": 0.00011152872300158694, "loss": 0.2071, "step": 3864 }, { "epoch": 4.58, "learning_rate": 0.00011149121442658861, "loss": 0.2935, "step": 3865 }, { "epoch": 4.58, "learning_rate": 0.00011145370421319377, "loss": 0.2191, "step": 3866 }, { "epoch": 4.58, "learning_rate": 0.00011141619236675056, "loss": 0.2737, "step": 3867 }, { "epoch": 4.59, "learning_rate": 0.00011137867889260734, "loss": 0.2281, "step": 3868 }, { "epoch": 4.59, "learning_rate": 0.00011134116379611273, "loss": 0.2083, "step": 3869 }, { "epoch": 4.59, "learning_rate": 0.00011130364708261552, "loss": 0.2079, "step": 3870 }, { "epoch": 4.59, "learning_rate": 0.00011126612875746479, "loss": 0.2423, "step": 3871 }, { "epoch": 4.59, "learning_rate": 0.00011122860882600986, "loss": 0.1903, "step": 3872 }, { "epoch": 4.59, "learning_rate": 0.00011119108729360026, "loss": 0.1995, "step": 3873 }, { "epoch": 4.59, "learning_rate": 0.0001111535641655857, "loss": 0.2479, "step": 3874 }, { "epoch": 4.59, "learning_rate": 0.00011111603944731623, "loss": 0.198, "step": 3875 }, { "epoch": 4.6, "learning_rate": 0.00011107851314414197, "loss": 0.2242, "step": 3876 }, { "epoch": 4.6, "learning_rate": 0.0001110409852614134, "loss": 0.29, "step": 3877 }, { "epoch": 4.6, "learning_rate": 0.00011100345580448118, "loss": 0.1931, "step": 3878 }, { "epoch": 4.6, "learning_rate": 0.00011096592477869616, "loss": 0.2195, "step": 3879 }, { "epoch": 4.6, "learning_rate": 0.0001109283921894095, "loss": 0.2383, "step": 3880 }, { "epoch": 4.6, "learning_rate": 0.00011089085804197248, "loss": 0.2729, "step": 3881 }, { "epoch": 4.6, "learning_rate": 0.00011085332234173664, "loss": 0.1836, "step": 3882 }, { "epoch": 4.6, "learning_rate": 0.00011081578509405382, "loss": 0.2724, "step": 3883 }, { "epoch": 4.61, "learning_rate": 0.00011077824630427594, "loss": 0.2027, "step": 3884 }, { "epoch": 4.61, "learning_rate": 0.00011074070597775527, "loss": 0.2681, "step": 3885 }, { "epoch": 4.61, "learning_rate": 0.00011070316411984421, "loss": 0.205, "step": 3886 }, { "epoch": 4.61, "learning_rate": 0.0001106656207358954, "loss": 0.3106, "step": 3887 }, { "epoch": 4.61, "learning_rate": 0.00011062807583126172, "loss": 0.2126, "step": 3888 }, { "epoch": 4.61, "learning_rate": 0.00011059052941129628, "loss": 0.4017, "step": 3889 }, { "epoch": 4.61, "learning_rate": 0.00011055298148135236, "loss": 0.2406, "step": 3890 }, { "epoch": 4.61, "learning_rate": 0.00011051543204678348, "loss": 0.2833, "step": 3891 }, { "epoch": 4.61, "learning_rate": 0.00011047788111294333, "loss": 0.2224, "step": 3892 }, { "epoch": 4.62, "learning_rate": 0.0001104403286851859, "loss": 0.3536, "step": 3893 }, { "epoch": 4.62, "learning_rate": 0.00011040277476886533, "loss": 0.2373, "step": 3894 }, { "epoch": 4.62, "learning_rate": 0.00011036521936933604, "loss": 0.2297, "step": 3895 }, { "epoch": 4.62, "learning_rate": 0.00011032766249195252, "loss": 0.1979, "step": 3896 }, { "epoch": 4.62, "learning_rate": 0.00011029010414206965, "loss": 0.2434, "step": 3897 }, { "epoch": 4.62, "learning_rate": 0.00011025254432504233, "loss": 0.2897, "step": 3898 }, { "epoch": 4.62, "learning_rate": 0.00011021498304622586, "loss": 0.2121, "step": 3899 }, { "epoch": 4.62, "learning_rate": 0.00011017742031097563, "loss": 0.3021, "step": 3900 }, { "epoch": 4.63, "learning_rate": 0.00011013985612464726, "loss": 0.2463, "step": 3901 }, { "epoch": 4.63, "learning_rate": 0.0001101022904925966, "loss": 0.3078, "step": 3902 }, { "epoch": 4.63, "learning_rate": 0.00011006472342017966, "loss": 0.3664, "step": 3903 }, { "epoch": 4.63, "learning_rate": 0.0001100271549127527, "loss": 0.2176, "step": 3904 }, { "epoch": 4.63, "learning_rate": 0.0001099895849756722, "loss": 0.2137, "step": 3905 }, { "epoch": 4.63, "learning_rate": 0.00010995201361429474, "loss": 0.2588, "step": 3906 }, { "epoch": 4.63, "learning_rate": 0.00010991444083397728, "loss": 0.2686, "step": 3907 }, { "epoch": 4.63, "learning_rate": 0.00010987686664007679, "loss": 0.2235, "step": 3908 }, { "epoch": 4.64, "learning_rate": 0.00010983929103795059, "loss": 0.2602, "step": 3909 }, { "epoch": 4.64, "learning_rate": 0.0001098017140329561, "loss": 0.1857, "step": 3910 }, { "epoch": 4.64, "learning_rate": 0.00010976413563045094, "loss": 0.2307, "step": 3911 }, { "epoch": 4.64, "learning_rate": 0.00010972655583579308, "loss": 0.2658, "step": 3912 }, { "epoch": 4.64, "learning_rate": 0.00010968897465434051, "loss": 0.2106, "step": 3913 }, { "epoch": 4.64, "learning_rate": 0.00010965139209145152, "loss": 0.2122, "step": 3914 }, { "epoch": 4.64, "learning_rate": 0.00010961380815248454, "loss": 0.2433, "step": 3915 }, { "epoch": 4.64, "learning_rate": 0.0001095762228427982, "loss": 0.2032, "step": 3916 }, { "epoch": 4.64, "learning_rate": 0.00010953863616775138, "loss": 0.3393, "step": 3917 }, { "epoch": 4.65, "learning_rate": 0.00010950104813270314, "loss": 0.2476, "step": 3918 }, { "epoch": 4.65, "learning_rate": 0.00010946345874301264, "loss": 0.1929, "step": 3919 }, { "epoch": 4.65, "learning_rate": 0.0001094258680040394, "loss": 0.2509, "step": 3920 }, { "epoch": 4.65, "learning_rate": 0.00010938827592114294, "loss": 0.2103, "step": 3921 }, { "epoch": 4.65, "learning_rate": 0.00010935068249968314, "loss": 0.2297, "step": 3922 }, { "epoch": 4.65, "learning_rate": 0.00010931308774501998, "loss": 0.2259, "step": 3923 }, { "epoch": 4.65, "learning_rate": 0.00010927549166251368, "loss": 0.238, "step": 3924 }, { "epoch": 4.65, "learning_rate": 0.00010923789425752456, "loss": 0.3147, "step": 3925 }, { "epoch": 4.66, "learning_rate": 0.00010920029553541326, "loss": 0.2753, "step": 3926 }, { "epoch": 4.66, "learning_rate": 0.00010916269550154048, "loss": 0.2399, "step": 3927 }, { "epoch": 4.66, "learning_rate": 0.0001091250941612672, "loss": 0.2196, "step": 3928 }, { "epoch": 4.66, "learning_rate": 0.00010908749151995452, "loss": 0.2326, "step": 3929 }, { "epoch": 4.66, "learning_rate": 0.0001090498875829638, "loss": 0.2217, "step": 3930 }, { "epoch": 4.66, "learning_rate": 0.00010901228235565651, "loss": 0.2012, "step": 3931 }, { "epoch": 4.66, "learning_rate": 0.00010897467584339434, "loss": 0.2018, "step": 3932 }, { "epoch": 4.66, "learning_rate": 0.00010893706805153915, "loss": 0.2382, "step": 3933 }, { "epoch": 4.67, "learning_rate": 0.000108899458985453, "loss": 0.2202, "step": 3934 }, { "epoch": 4.67, "learning_rate": 0.00010886184865049813, "loss": 0.2038, "step": 3935 }, { "epoch": 4.67, "learning_rate": 0.00010882423705203698, "loss": 0.2406, "step": 3936 }, { "epoch": 4.67, "learning_rate": 0.00010878662419543206, "loss": 0.2393, "step": 3937 }, { "epoch": 4.67, "learning_rate": 0.00010874901008604623, "loss": 0.2626, "step": 3938 }, { "epoch": 4.67, "learning_rate": 0.00010871139472924237, "loss": 0.246, "step": 3939 }, { "epoch": 4.67, "learning_rate": 0.00010867377813038366, "loss": 0.2228, "step": 3940 }, { "epoch": 4.67, "learning_rate": 0.00010863616029483339, "loss": 0.2091, "step": 3941 }, { "epoch": 4.67, "learning_rate": 0.00010859854122795508, "loss": 0.215, "step": 3942 }, { "epoch": 4.68, "learning_rate": 0.0001085609209351123, "loss": 0.2071, "step": 3943 }, { "epoch": 4.68, "learning_rate": 0.00010852329942166894, "loss": 0.2208, "step": 3944 }, { "epoch": 4.68, "learning_rate": 0.00010848567669298901, "loss": 0.1988, "step": 3945 }, { "epoch": 4.68, "learning_rate": 0.00010844805275443673, "loss": 0.2129, "step": 3946 }, { "epoch": 4.68, "learning_rate": 0.00010841042761137634, "loss": 0.3038, "step": 3947 }, { "epoch": 4.68, "learning_rate": 0.00010837280126917248, "loss": 0.206, "step": 3948 }, { "epoch": 4.68, "learning_rate": 0.00010833517373318975, "loss": 0.2648, "step": 3949 }, { "epoch": 4.68, "learning_rate": 0.00010829754500879308, "loss": 0.2136, "step": 3950 }, { "epoch": 4.69, "learning_rate": 0.0001082599151013475, "loss": 0.3746, "step": 3951 }, { "epoch": 4.69, "learning_rate": 0.00010822228401621819, "loss": 0.2403, "step": 3952 }, { "epoch": 4.69, "learning_rate": 0.00010818465175877052, "loss": 0.2288, "step": 3953 }, { "epoch": 4.69, "learning_rate": 0.0001081470183343701, "loss": 0.2099, "step": 3954 }, { "epoch": 4.69, "learning_rate": 0.00010810938374838251, "loss": 0.1992, "step": 3955 }, { "epoch": 4.69, "learning_rate": 0.0001080717480061737, "loss": 0.2337, "step": 3956 }, { "epoch": 4.69, "learning_rate": 0.00010803411111310971, "loss": 0.2127, "step": 3957 }, { "epoch": 4.69, "learning_rate": 0.00010799647307455674, "loss": 0.2936, "step": 3958 }, { "epoch": 4.7, "learning_rate": 0.00010795883389588111, "loss": 0.3019, "step": 3959 }, { "epoch": 4.7, "learning_rate": 0.00010792119358244939, "loss": 0.2262, "step": 3960 }, { "epoch": 4.7, "learning_rate": 0.00010788355213962825, "loss": 0.2561, "step": 3961 }, { "epoch": 4.7, "learning_rate": 0.0001078459095727845, "loss": 0.2992, "step": 3962 }, { "epoch": 4.7, "learning_rate": 0.0001078082658872852, "loss": 0.2081, "step": 3963 }, { "epoch": 4.7, "learning_rate": 0.00010777062108849756, "loss": 0.2089, "step": 3964 }, { "epoch": 4.7, "learning_rate": 0.00010773297518178881, "loss": 0.2112, "step": 3965 }, { "epoch": 4.7, "learning_rate": 0.00010769532817252653, "loss": 0.1898, "step": 3966 }, { "epoch": 4.7, "learning_rate": 0.00010765768006607826, "loss": 0.3229, "step": 3967 }, { "epoch": 4.71, "learning_rate": 0.00010762003086781185, "loss": 0.2241, "step": 3968 }, { "epoch": 4.71, "learning_rate": 0.00010758238058309527, "loss": 0.2814, "step": 3969 }, { "epoch": 4.71, "learning_rate": 0.00010754472921729661, "loss": 0.2403, "step": 3970 }, { "epoch": 4.71, "learning_rate": 0.00010750707677578413, "loss": 0.2715, "step": 3971 }, { "epoch": 4.71, "eval_loss": 3.3954412937164307, "eval_runtime": 283.9122, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 3971 }, { "epoch": 4.71, "learning_rate": 0.00010746942326392628, "loss": 0.2263, "step": 3972 }, { "epoch": 4.71, "learning_rate": 0.00010743176868709157, "loss": 0.2433, "step": 3973 }, { "epoch": 4.71, "learning_rate": 0.0001073941130506488, "loss": 0.2871, "step": 3974 }, { "epoch": 4.71, "learning_rate": 0.00010735645635996676, "loss": 0.2416, "step": 3975 }, { "epoch": 4.72, "learning_rate": 0.0001073187986204145, "loss": 0.2563, "step": 3976 }, { "epoch": 4.72, "learning_rate": 0.00010728113983736126, "loss": 0.2502, "step": 3977 }, { "epoch": 4.72, "learning_rate": 0.00010724348001617625, "loss": 0.2145, "step": 3978 }, { "epoch": 4.72, "learning_rate": 0.000107205819162229, "loss": 0.2639, "step": 3979 }, { "epoch": 4.72, "learning_rate": 0.00010716815728088912, "loss": 0.2279, "step": 3980 }, { "epoch": 4.72, "learning_rate": 0.0001071304943775264, "loss": 0.2086, "step": 3981 }, { "epoch": 4.72, "learning_rate": 0.00010709283045751069, "loss": 0.2142, "step": 3982 }, { "epoch": 4.72, "learning_rate": 0.0001070551655262121, "loss": 0.2381, "step": 3983 }, { "epoch": 4.73, "learning_rate": 0.00010701749958900078, "loss": 0.2313, "step": 3984 }, { "epoch": 4.73, "learning_rate": 0.0001069798326512471, "loss": 0.1954, "step": 3985 }, { "epoch": 4.73, "learning_rate": 0.00010694216471832152, "loss": 0.2253, "step": 3986 }, { "epoch": 4.73, "learning_rate": 0.00010690449579559469, "loss": 0.2104, "step": 3987 }, { "epoch": 4.73, "learning_rate": 0.00010686682588843737, "loss": 0.2172, "step": 3988 }, { "epoch": 4.73, "learning_rate": 0.00010682915500222051, "loss": 0.2094, "step": 3989 }, { "epoch": 4.73, "learning_rate": 0.00010679148314231504, "loss": 0.2885, "step": 3990 }, { "epoch": 4.73, "learning_rate": 0.00010675381031409225, "loss": 0.3085, "step": 3991 }, { "epoch": 4.73, "learning_rate": 0.00010671613652292343, "loss": 0.2515, "step": 3992 }, { "epoch": 4.74, "learning_rate": 0.00010667846177418003, "loss": 0.2314, "step": 3993 }, { "epoch": 4.74, "learning_rate": 0.00010664078607323367, "loss": 0.2473, "step": 3994 }, { "epoch": 4.74, "learning_rate": 0.00010660310942545608, "loss": 0.2283, "step": 3995 }, { "epoch": 4.74, "learning_rate": 0.00010656543183621912, "loss": 0.226, "step": 3996 }, { "epoch": 4.74, "learning_rate": 0.00010652775331089477, "loss": 0.2169, "step": 3997 }, { "epoch": 4.74, "learning_rate": 0.00010649007385485519, "loss": 0.2079, "step": 3998 }, { "epoch": 4.74, "learning_rate": 0.00010645239347347269, "loss": 0.2437, "step": 3999 }, { "epoch": 4.74, "learning_rate": 0.00010641471217211958, "loss": 0.2127, "step": 4000 }, { "epoch": 4.75, "learning_rate": 0.00010637702995616847, "loss": 0.2527, "step": 4001 }, { "epoch": 4.75, "learning_rate": 0.00010633934683099196, "loss": 0.2193, "step": 4002 }, { "epoch": 4.75, "learning_rate": 0.0001063016628019629, "loss": 0.2744, "step": 4003 }, { "epoch": 4.75, "learning_rate": 0.00010626397787445416, "loss": 0.2592, "step": 4004 }, { "epoch": 4.75, "learning_rate": 0.00010622629205383885, "loss": 0.2107, "step": 4005 }, { "epoch": 4.75, "learning_rate": 0.00010618860534549006, "loss": 0.1956, "step": 4006 }, { "epoch": 4.75, "learning_rate": 0.00010615091775478117, "loss": 0.2546, "step": 4007 }, { "epoch": 4.75, "learning_rate": 0.00010611322928708555, "loss": 0.2376, "step": 4008 }, { "epoch": 4.76, "learning_rate": 0.00010607553994777684, "loss": 0.2359, "step": 4009 }, { "epoch": 4.76, "learning_rate": 0.00010603784974222861, "loss": 0.2631, "step": 4010 }, { "epoch": 4.76, "learning_rate": 0.00010600015867581474, "loss": 0.2602, "step": 4011 }, { "epoch": 4.76, "learning_rate": 0.00010596246675390911, "loss": 0.2043, "step": 4012 }, { "epoch": 4.76, "learning_rate": 0.00010592477398188575, "loss": 0.2325, "step": 4013 }, { "epoch": 4.76, "learning_rate": 0.0001058870803651189, "loss": 0.2395, "step": 4014 }, { "epoch": 4.76, "learning_rate": 0.00010584938590898281, "loss": 0.2205, "step": 4015 }, { "epoch": 4.76, "learning_rate": 0.00010581169061885185, "loss": 0.2169, "step": 4016 }, { "epoch": 4.76, "learning_rate": 0.00010577399450010062, "loss": 0.1986, "step": 4017 }, { "epoch": 4.77, "learning_rate": 0.0001057362975581037, "loss": 0.2011, "step": 4018 }, { "epoch": 4.77, "learning_rate": 0.00010569859979823586, "loss": 0.2208, "step": 4019 }, { "epoch": 4.77, "learning_rate": 0.000105660901225872, "loss": 0.2478, "step": 4020 }, { "epoch": 4.77, "learning_rate": 0.00010562320184638714, "loss": 0.1936, "step": 4021 }, { "epoch": 4.77, "learning_rate": 0.00010558550166515633, "loss": 0.2719, "step": 4022 }, { "epoch": 4.77, "learning_rate": 0.00010554780068755483, "loss": 0.2873, "step": 4023 }, { "epoch": 4.77, "learning_rate": 0.00010551009891895796, "loss": 0.1993, "step": 4024 }, { "epoch": 4.77, "learning_rate": 0.00010547239636474115, "loss": 0.2174, "step": 4025 }, { "epoch": 4.78, "learning_rate": 0.00010543469303028002, "loss": 0.2009, "step": 4026 }, { "epoch": 4.78, "learning_rate": 0.00010539698892095021, "loss": 0.2038, "step": 4027 }, { "epoch": 4.78, "learning_rate": 0.0001053592840421275, "loss": 0.2119, "step": 4028 }, { "epoch": 4.78, "learning_rate": 0.00010532157839918779, "loss": 0.242, "step": 4029 }, { "epoch": 4.78, "learning_rate": 0.00010528387199750707, "loss": 0.2026, "step": 4030 }, { "epoch": 4.78, "learning_rate": 0.00010524616484246146, "loss": 0.2445, "step": 4031 }, { "epoch": 4.78, "learning_rate": 0.00010520845693942719, "loss": 0.2793, "step": 4032 }, { "epoch": 4.78, "learning_rate": 0.00010517074829378057, "loss": 0.2658, "step": 4033 }, { "epoch": 4.79, "learning_rate": 0.00010513303891089803, "loss": 0.2069, "step": 4034 }, { "epoch": 4.79, "learning_rate": 0.00010509532879615614, "loss": 0.2211, "step": 4035 }, { "epoch": 4.79, "learning_rate": 0.00010505761795493145, "loss": 0.2078, "step": 4036 }, { "epoch": 4.79, "learning_rate": 0.00010501990639260079, "loss": 0.2796, "step": 4037 }, { "epoch": 4.79, "learning_rate": 0.00010498219411454098, "loss": 0.2201, "step": 4038 }, { "epoch": 4.79, "learning_rate": 0.000104944481126129, "loss": 0.198, "step": 4039 }, { "epoch": 4.79, "learning_rate": 0.00010490676743274181, "loss": 0.2182, "step": 4040 }, { "epoch": 4.79, "learning_rate": 0.00010486905303975664, "loss": 0.216, "step": 4041 }, { "epoch": 4.79, "learning_rate": 0.00010483133795255071, "loss": 0.2365, "step": 4042 }, { "epoch": 4.8, "learning_rate": 0.00010479362217650137, "loss": 0.2472, "step": 4043 }, { "epoch": 4.8, "learning_rate": 0.0001047559057169861, "loss": 0.2259, "step": 4044 }, { "epoch": 4.8, "learning_rate": 0.00010471818857938238, "loss": 0.2306, "step": 4045 }, { "epoch": 4.8, "learning_rate": 0.00010468047076906793, "loss": 0.2689, "step": 4046 }, { "epoch": 4.8, "learning_rate": 0.0001046427522914204, "loss": 0.2361, "step": 4047 }, { "epoch": 4.8, "learning_rate": 0.00010460503315181768, "loss": 0.2919, "step": 4048 }, { "epoch": 4.8, "learning_rate": 0.00010456731335563769, "loss": 0.2397, "step": 4049 }, { "epoch": 4.8, "learning_rate": 0.00010452959290825846, "loss": 0.2144, "step": 4050 }, { "epoch": 4.81, "learning_rate": 0.00010449187181505804, "loss": 0.258, "step": 4051 }, { "epoch": 4.81, "learning_rate": 0.00010445415008141473, "loss": 0.2199, "step": 4052 }, { "epoch": 4.81, "learning_rate": 0.00010441642771270675, "loss": 0.1817, "step": 4053 }, { "epoch": 4.81, "learning_rate": 0.00010437870471431251, "loss": 0.2089, "step": 4054 }, { "epoch": 4.81, "learning_rate": 0.00010434098109161051, "loss": 0.2047, "step": 4055 }, { "epoch": 4.81, "learning_rate": 0.00010430325684997928, "loss": 0.2067, "step": 4056 }, { "epoch": 4.81, "learning_rate": 0.00010426553199479749, "loss": 0.1996, "step": 4057 }, { "epoch": 4.81, "learning_rate": 0.0001042278065314439, "loss": 0.2205, "step": 4058 }, { "epoch": 4.82, "learning_rate": 0.0001041900804652973, "loss": 0.2508, "step": 4059 }, { "epoch": 4.82, "learning_rate": 0.00010415235380173662, "loss": 0.2562, "step": 4060 }, { "epoch": 4.82, "learning_rate": 0.00010411462654614088, "loss": 0.2199, "step": 4061 }, { "epoch": 4.82, "learning_rate": 0.00010407689870388916, "loss": 0.2718, "step": 4062 }, { "epoch": 4.82, "learning_rate": 0.00010403917028036058, "loss": 0.2292, "step": 4063 }, { "epoch": 4.82, "learning_rate": 0.00010400144128093448, "loss": 0.3123, "step": 4064 }, { "epoch": 4.82, "learning_rate": 0.00010396371171099006, "loss": 0.2814, "step": 4065 }, { "epoch": 4.82, "learning_rate": 0.00010392598157590688, "loss": 0.231, "step": 4066 }, { "epoch": 4.82, "learning_rate": 0.00010388825088106433, "loss": 0.2242, "step": 4067 }, { "epoch": 4.83, "learning_rate": 0.00010385051963184202, "loss": 0.1998, "step": 4068 }, { "epoch": 4.83, "learning_rate": 0.0001038127878336196, "loss": 0.1902, "step": 4069 }, { "epoch": 4.83, "learning_rate": 0.00010377505549177682, "loss": 0.2198, "step": 4070 }, { "epoch": 4.83, "learning_rate": 0.00010373732261169346, "loss": 0.2537, "step": 4071 }, { "epoch": 4.83, "learning_rate": 0.00010369958919874943, "loss": 0.2267, "step": 4072 }, { "epoch": 4.83, "learning_rate": 0.00010366185525832467, "loss": 0.2376, "step": 4073 }, { "epoch": 4.83, "learning_rate": 0.00010362412079579924, "loss": 0.2076, "step": 4074 }, { "epoch": 4.83, "learning_rate": 0.00010358638581655322, "loss": 0.2507, "step": 4075 }, { "epoch": 4.84, "learning_rate": 0.00010354865032596682, "loss": 0.2077, "step": 4076 }, { "epoch": 4.84, "learning_rate": 0.00010351091432942029, "loss": 0.2762, "step": 4077 }, { "epoch": 4.84, "learning_rate": 0.00010347317783229398, "loss": 0.2232, "step": 4078 }, { "epoch": 4.84, "learning_rate": 0.00010343544083996824, "loss": 0.2475, "step": 4079 }, { "epoch": 4.84, "learning_rate": 0.00010339770335782359, "loss": 0.2108, "step": 4080 }, { "epoch": 4.84, "learning_rate": 0.00010335996539124055, "loss": 0.2544, "step": 4081 }, { "epoch": 4.84, "learning_rate": 0.00010332222694559975, "loss": 0.2253, "step": 4082 }, { "epoch": 4.84, "learning_rate": 0.00010328448802628183, "loss": 0.2324, "step": 4083 }, { "epoch": 4.85, "learning_rate": 0.00010324674863866759, "loss": 0.287, "step": 4084 }, { "epoch": 4.85, "learning_rate": 0.0001032090087881378, "loss": 0.3515, "step": 4085 }, { "epoch": 4.85, "learning_rate": 0.00010317126848007337, "loss": 0.2242, "step": 4086 }, { "epoch": 4.85, "learning_rate": 0.0001031335277198552, "loss": 0.2242, "step": 4087 }, { "epoch": 4.85, "learning_rate": 0.00010309578651286436, "loss": 0.1879, "step": 4088 }, { "epoch": 4.85, "learning_rate": 0.00010305804486448186, "loss": 0.2261, "step": 4089 }, { "epoch": 4.85, "learning_rate": 0.0001030203027800889, "loss": 0.2415, "step": 4090 }, { "epoch": 4.85, "learning_rate": 0.00010298256026506662, "loss": 0.2141, "step": 4091 }, { "epoch": 4.85, "learning_rate": 0.00010294481732479635, "loss": 0.2015, "step": 4092 }, { "epoch": 4.86, "learning_rate": 0.0001029070739646593, "loss": 0.206, "step": 4093 }, { "epoch": 4.86, "learning_rate": 0.00010286933019003697, "loss": 0.2598, "step": 4094 }, { "epoch": 4.86, "learning_rate": 0.00010283158600631072, "loss": 0.2561, "step": 4095 }, { "epoch": 4.86, "learning_rate": 0.00010279384141886208, "loss": 0.1914, "step": 4096 }, { "epoch": 4.86, "learning_rate": 0.00010275609643307258, "loss": 0.2416, "step": 4097 }, { "epoch": 4.86, "learning_rate": 0.00010271835105432388, "loss": 0.2012, "step": 4098 }, { "epoch": 4.86, "learning_rate": 0.00010268060528799754, "loss": 0.3043, "step": 4099 }, { "epoch": 4.86, "learning_rate": 0.00010264285913947545, "loss": 0.2331, "step": 4100 }, { "epoch": 4.87, "learning_rate": 0.00010260511261413923, "loss": 0.237, "step": 4101 }, { "epoch": 4.87, "learning_rate": 0.00010256736571737083, "loss": 0.2776, "step": 4102 }, { "epoch": 4.87, "learning_rate": 0.00010252961845455205, "loss": 0.1938, "step": 4103 }, { "epoch": 4.87, "learning_rate": 0.00010249187083106486, "loss": 0.2596, "step": 4104 }, { "epoch": 4.87, "learning_rate": 0.00010245412285229124, "loss": 0.196, "step": 4105 }, { "epoch": 4.87, "learning_rate": 0.00010241637452361323, "loss": 0.2369, "step": 4106 }, { "epoch": 4.87, "learning_rate": 0.00010237862585041293, "loss": 0.2091, "step": 4107 }, { "epoch": 4.87, "learning_rate": 0.00010234087683807247, "loss": 0.2273, "step": 4108 }, { "epoch": 4.88, "learning_rate": 0.00010230312749197406, "loss": 0.3996, "step": 4109 }, { "epoch": 4.88, "learning_rate": 0.00010226537781749987, "loss": 0.2382, "step": 4110 }, { "epoch": 4.88, "learning_rate": 0.00010222762782003223, "loss": 0.2174, "step": 4111 }, { "epoch": 4.88, "learning_rate": 0.00010218987750495343, "loss": 0.2569, "step": 4112 }, { "epoch": 4.88, "learning_rate": 0.00010215212687764593, "loss": 0.2239, "step": 4113 }, { "epoch": 4.88, "learning_rate": 0.00010211437594349203, "loss": 0.3192, "step": 4114 }, { "epoch": 4.88, "learning_rate": 0.00010207662470787427, "loss": 0.2347, "step": 4115 }, { "epoch": 4.88, "learning_rate": 0.00010203887317617511, "loss": 0.2461, "step": 4116 }, { "epoch": 4.88, "learning_rate": 0.00010200112135377709, "loss": 0.2826, "step": 4117 }, { "epoch": 4.89, "learning_rate": 0.00010196336924606283, "loss": 0.3531, "step": 4118 }, { "epoch": 4.89, "learning_rate": 0.00010192561685841496, "loss": 0.2104, "step": 4119 }, { "epoch": 4.89, "learning_rate": 0.00010188786419621612, "loss": 0.2257, "step": 4120 }, { "epoch": 4.89, "learning_rate": 0.00010185011126484903, "loss": 0.2096, "step": 4121 }, { "epoch": 4.89, "learning_rate": 0.0001018123580696964, "loss": 0.2009, "step": 4122 }, { "epoch": 4.89, "learning_rate": 0.00010177460461614108, "loss": 0.3198, "step": 4123 }, { "epoch": 4.89, "learning_rate": 0.00010173685090956582, "loss": 0.1979, "step": 4124 }, { "epoch": 4.89, "learning_rate": 0.00010169909695535354, "loss": 0.2507, "step": 4125 }, { "epoch": 4.9, "learning_rate": 0.00010166134275888708, "loss": 0.2295, "step": 4126 }, { "epoch": 4.9, "learning_rate": 0.00010162358832554937, "loss": 0.2355, "step": 4127 }, { "epoch": 4.9, "learning_rate": 0.00010158583366072338, "loss": 0.2253, "step": 4128 }, { "epoch": 4.9, "learning_rate": 0.00010154807876979213, "loss": 0.3306, "step": 4129 }, { "epoch": 4.9, "learning_rate": 0.00010151032365813859, "loss": 0.2265, "step": 4130 }, { "epoch": 4.9, "learning_rate": 0.00010147256833114586, "loss": 0.2176, "step": 4131 }, { "epoch": 4.9, "learning_rate": 0.000101434812794197, "loss": 0.2631, "step": 4132 }, { "epoch": 4.9, "learning_rate": 0.00010139705705267513, "loss": 0.2241, "step": 4133 }, { "epoch": 4.91, "learning_rate": 0.00010135930111196338, "loss": 0.2317, "step": 4134 }, { "epoch": 4.91, "learning_rate": 0.000101321544977445, "loss": 0.2325, "step": 4135 }, { "epoch": 4.91, "learning_rate": 0.00010128378865450307, "loss": 0.2011, "step": 4136 }, { "epoch": 4.91, "learning_rate": 0.00010124603214852093, "loss": 0.232, "step": 4137 }, { "epoch": 4.91, "learning_rate": 0.00010120827546488174, "loss": 0.2624, "step": 4138 }, { "epoch": 4.91, "learning_rate": 0.00010117051860896885, "loss": 0.2452, "step": 4139 }, { "epoch": 4.91, "learning_rate": 0.00010113276158616553, "loss": 0.2261, "step": 4140 }, { "epoch": 4.91, "learning_rate": 0.00010109500440185514, "loss": 0.2378, "step": 4141 }, { "epoch": 4.91, "learning_rate": 0.000101057247061421, "loss": 0.2172, "step": 4142 }, { "epoch": 4.92, "learning_rate": 0.00010101948957024647, "loss": 0.2539, "step": 4143 }, { "epoch": 4.92, "learning_rate": 0.00010098173193371499, "loss": 0.2178, "step": 4144 }, { "epoch": 4.92, "learning_rate": 0.00010094397415720991, "loss": 0.2545, "step": 4145 }, { "epoch": 4.92, "learning_rate": 0.00010090621624611474, "loss": 0.2233, "step": 4146 }, { "epoch": 4.92, "learning_rate": 0.0001008684582058129, "loss": 0.2547, "step": 4147 }, { "epoch": 4.92, "learning_rate": 0.00010083070004168786, "loss": 0.232, "step": 4148 }, { "epoch": 4.92, "learning_rate": 0.00010079294175912313, "loss": 0.313, "step": 4149 }, { "epoch": 4.92, "learning_rate": 0.00010075518336350218, "loss": 0.2234, "step": 4150 }, { "epoch": 4.93, "learning_rate": 0.00010071742486020854, "loss": 0.2447, "step": 4151 }, { "epoch": 4.93, "learning_rate": 0.00010067966625462577, "loss": 0.246, "step": 4152 }, { "epoch": 4.93, "learning_rate": 0.00010064190755213745, "loss": 0.1836, "step": 4153 }, { "epoch": 4.93, "learning_rate": 0.00010060414875812709, "loss": 0.2655, "step": 4154 }, { "epoch": 4.93, "learning_rate": 0.00010056638987797833, "loss": 0.2338, "step": 4155 }, { "epoch": 4.93, "learning_rate": 0.00010052863091707467, "loss": 0.2014, "step": 4156 }, { "epoch": 4.93, "learning_rate": 0.00010049087188079983, "loss": 0.2492, "step": 4157 }, { "epoch": 4.93, "learning_rate": 0.0001004531127745373, "loss": 0.2547, "step": 4158 }, { "epoch": 4.94, "learning_rate": 0.00010041535360367085, "loss": 0.2837, "step": 4159 }, { "epoch": 4.94, "learning_rate": 0.00010037759437358398, "loss": 0.2598, "step": 4160 }, { "epoch": 4.94, "learning_rate": 0.0001003398350896604, "loss": 0.2047, "step": 4161 }, { "epoch": 4.94, "learning_rate": 0.00010030207575728374, "loss": 0.2006, "step": 4162 }, { "epoch": 4.94, "learning_rate": 0.00010026431638183771, "loss": 0.2399, "step": 4163 }, { "epoch": 4.94, "learning_rate": 0.00010022655696870588, "loss": 0.2508, "step": 4164 }, { "epoch": 4.94, "learning_rate": 0.00010018879752327202, "loss": 0.2217, "step": 4165 }, { "epoch": 4.94, "learning_rate": 0.00010015103805091973, "loss": 0.2649, "step": 4166 }, { "epoch": 4.94, "learning_rate": 0.00010011327855703267, "loss": 0.2819, "step": 4167 }, { "epoch": 4.95, "learning_rate": 0.00010007551904699459, "loss": 0.2435, "step": 4168 }, { "epoch": 4.95, "learning_rate": 0.00010003775952618914, "loss": 0.2641, "step": 4169 }, { "epoch": 4.95, "learning_rate": 0.0001, "loss": 0.2157, "step": 4170 }, { "epoch": 4.95, "learning_rate": 9.996224047381087e-05, "loss": 0.2414, "step": 4171 }, { "epoch": 4.95, "learning_rate": 9.992448095300542e-05, "loss": 0.1886, "step": 4172 }, { "epoch": 4.95, "learning_rate": 9.988672144296735e-05, "loss": 0.2392, "step": 4173 }, { "epoch": 4.95, "learning_rate": 9.984896194908031e-05, "loss": 0.2231, "step": 4174 }, { "epoch": 4.95, "learning_rate": 9.981120247672801e-05, "loss": 0.2024, "step": 4175 }, { "epoch": 4.96, "learning_rate": 9.97734430312941e-05, "loss": 0.2104, "step": 4176 }, { "epoch": 4.96, "learning_rate": 9.973568361816233e-05, "loss": 0.2793, "step": 4177 }, { "epoch": 4.96, "learning_rate": 9.969792424271627e-05, "loss": 0.2292, "step": 4178 }, { "epoch": 4.96, "learning_rate": 9.966016491033962e-05, "loss": 0.2046, "step": 4179 }, { "epoch": 4.96, "learning_rate": 9.962240562641602e-05, "loss": 0.2099, "step": 4180 }, { "epoch": 4.96, "eval_loss": 3.49141788482666, "eval_runtime": 284.0307, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 4180 }, { "epoch": 4.96, "learning_rate": 9.95846463963292e-05, "loss": 0.2376, "step": 4181 }, { "epoch": 4.96, "learning_rate": 9.95468872254627e-05, "loss": 0.21, "step": 4182 }, { "epoch": 4.96, "learning_rate": 9.950912811920021e-05, "loss": 0.2079, "step": 4183 }, { "epoch": 4.97, "learning_rate": 9.947136908292534e-05, "loss": 0.2151, "step": 4184 }, { "epoch": 4.97, "learning_rate": 9.943361012202172e-05, "loss": 0.2247, "step": 4185 }, { "epoch": 4.97, "learning_rate": 9.939585124187292e-05, "loss": 0.2318, "step": 4186 }, { "epoch": 4.97, "learning_rate": 9.935809244786256e-05, "loss": 0.2414, "step": 4187 }, { "epoch": 4.97, "learning_rate": 9.932033374537422e-05, "loss": 0.1861, "step": 4188 }, { "epoch": 4.97, "learning_rate": 9.92825751397915e-05, "loss": 0.2475, "step": 4189 }, { "epoch": 4.97, "learning_rate": 9.924481663649785e-05, "loss": 0.2163, "step": 4190 }, { "epoch": 4.97, "learning_rate": 9.92070582408769e-05, "loss": 0.2044, "step": 4191 }, { "epoch": 4.97, "learning_rate": 9.916929995831215e-05, "loss": 0.2369, "step": 4192 }, { "epoch": 4.98, "learning_rate": 9.913154179418713e-05, "loss": 0.2553, "step": 4193 }, { "epoch": 4.98, "learning_rate": 9.909378375388529e-05, "loss": 0.3004, "step": 4194 }, { "epoch": 4.98, "learning_rate": 9.90560258427901e-05, "loss": 0.2313, "step": 4195 }, { "epoch": 4.98, "learning_rate": 9.901826806628505e-05, "loss": 0.2134, "step": 4196 }, { "epoch": 4.98, "learning_rate": 9.898051042975358e-05, "loss": 0.2175, "step": 4197 }, { "epoch": 4.98, "learning_rate": 9.894275293857904e-05, "loss": 0.1946, "step": 4198 }, { "epoch": 4.98, "learning_rate": 9.890499559814487e-05, "loss": 0.2395, "step": 4199 }, { "epoch": 4.98, "learning_rate": 9.886723841383448e-05, "loss": 0.215, "step": 4200 }, { "epoch": 4.99, "learning_rate": 9.882948139103118e-05, "loss": 0.2206, "step": 4201 }, { "epoch": 4.99, "learning_rate": 9.879172453511827e-05, "loss": 0.3013, "step": 4202 }, { "epoch": 4.99, "learning_rate": 9.875396785147909e-05, "loss": 0.2874, "step": 4203 }, { "epoch": 4.99, "learning_rate": 9.871621134549692e-05, "loss": 0.2445, "step": 4204 }, { "epoch": 4.99, "learning_rate": 9.867845502255506e-05, "loss": 0.2294, "step": 4205 }, { "epoch": 4.99, "learning_rate": 9.864069888803663e-05, "loss": 0.2251, "step": 4206 }, { "epoch": 4.99, "learning_rate": 9.860294294732489e-05, "loss": 0.2164, "step": 4207 }, { "epoch": 4.99, "learning_rate": 9.856518720580303e-05, "loss": 0.2441, "step": 4208 }, { "epoch": 5.0, "learning_rate": 9.852743166885417e-05, "loss": 0.1956, "step": 4209 }, { "epoch": 5.0, "learning_rate": 9.848967634186142e-05, "loss": 0.2312, "step": 4210 }, { "epoch": 5.0, "learning_rate": 9.845192123020789e-05, "loss": 0.2617, "step": 4211 }, { "epoch": 5.0, "learning_rate": 9.841416633927662e-05, "loss": 0.2207, "step": 4212 }, { "epoch": 5.0, "learning_rate": 9.837641167445065e-05, "loss": 0.1902, "step": 4213 }, { "epoch": 5.0, "learning_rate": 9.833865724111295e-05, "loss": 0.2028, "step": 4214 }, { "epoch": 5.0, "learning_rate": 9.830090304464647e-05, "loss": 0.2526, "step": 4215 }, { "epoch": 5.0, "learning_rate": 9.826314909043418e-05, "loss": 0.2181, "step": 4216 }, { "epoch": 5.0, "learning_rate": 9.822539538385897e-05, "loss": 0.2086, "step": 4217 }, { "epoch": 5.01, "learning_rate": 9.818764193030363e-05, "loss": 0.252, "step": 4218 }, { "epoch": 5.01, "learning_rate": 9.8149888735151e-05, "loss": 0.248, "step": 4219 }, { "epoch": 5.01, "learning_rate": 9.81121358037839e-05, "loss": 0.1881, "step": 4220 }, { "epoch": 5.01, "learning_rate": 9.807438314158508e-05, "loss": 0.244, "step": 4221 }, { "epoch": 5.01, "learning_rate": 9.803663075393718e-05, "loss": 0.2777, "step": 4222 }, { "epoch": 5.01, "learning_rate": 9.799887864622292e-05, "loss": 0.2263, "step": 4223 }, { "epoch": 5.01, "learning_rate": 9.796112682382493e-05, "loss": 0.2016, "step": 4224 }, { "epoch": 5.01, "learning_rate": 9.792337529212578e-05, "loss": 0.212, "step": 4225 }, { "epoch": 5.02, "learning_rate": 9.7885624056508e-05, "loss": 0.2941, "step": 4226 }, { "epoch": 5.02, "learning_rate": 9.784787312235411e-05, "loss": 0.2384, "step": 4227 }, { "epoch": 5.0, "learning_rate": 9.781012249504655e-05, "loss": 0.1963, "step": 4228 }, { "epoch": 5.0, "learning_rate": 9.777237217996779e-05, "loss": 0.1817, "step": 4229 }, { "epoch": 5.0, "learning_rate": 9.773462218250015e-05, "loss": 0.1794, "step": 4230 }, { "epoch": 5.0, "learning_rate": 9.769687250802597e-05, "loss": 0.1783, "step": 4231 }, { "epoch": 5.01, "learning_rate": 9.765912316192752e-05, "loss": 0.2024, "step": 4232 }, { "epoch": 5.01, "learning_rate": 9.76213741495871e-05, "loss": 0.1771, "step": 4233 }, { "epoch": 5.01, "learning_rate": 9.75836254763868e-05, "loss": 0.1834, "step": 4234 }, { "epoch": 5.01, "learning_rate": 9.754587714770878e-05, "loss": 0.1689, "step": 4235 }, { "epoch": 5.01, "learning_rate": 9.750812916893517e-05, "loss": 0.1893, "step": 4236 }, { "epoch": 5.01, "learning_rate": 9.747038154544795e-05, "loss": 0.1801, "step": 4237 }, { "epoch": 5.01, "learning_rate": 9.743263428262921e-05, "loss": 0.1973, "step": 4238 }, { "epoch": 5.01, "learning_rate": 9.739488738586078e-05, "loss": 0.1716, "step": 4239 }, { "epoch": 5.02, "learning_rate": 9.735714086052458e-05, "loss": 0.1803, "step": 4240 }, { "epoch": 5.02, "learning_rate": 9.731939471200244e-05, "loss": 0.199, "step": 4241 }, { "epoch": 5.02, "learning_rate": 9.728164894567617e-05, "loss": 0.1682, "step": 4242 }, { "epoch": 5.02, "learning_rate": 9.724390356692745e-05, "loss": 0.2256, "step": 4243 }, { "epoch": 5.02, "learning_rate": 9.720615858113794e-05, "loss": 0.1715, "step": 4244 }, { "epoch": 5.02, "learning_rate": 9.71684139936893e-05, "loss": 0.1864, "step": 4245 }, { "epoch": 5.02, "learning_rate": 9.713066980996308e-05, "loss": 0.1682, "step": 4246 }, { "epoch": 5.02, "learning_rate": 9.709292603534072e-05, "loss": 0.1824, "step": 4247 }, { "epoch": 5.03, "learning_rate": 9.705518267520368e-05, "loss": 0.1741, "step": 4248 }, { "epoch": 5.03, "learning_rate": 9.701743973493337e-05, "loss": 0.1716, "step": 4249 }, { "epoch": 5.03, "learning_rate": 9.697969721991114e-05, "loss": 0.1889, "step": 4250 }, { "epoch": 5.03, "learning_rate": 9.694195513551815e-05, "loss": 0.1823, "step": 4251 }, { "epoch": 5.03, "learning_rate": 9.690421348713568e-05, "loss": 0.186, "step": 4252 }, { "epoch": 5.03, "learning_rate": 9.686647228014482e-05, "loss": 0.1867, "step": 4253 }, { "epoch": 5.03, "learning_rate": 9.682873151992668e-05, "loss": 0.173, "step": 4254 }, { "epoch": 5.03, "learning_rate": 9.679099121186222e-05, "loss": 0.1878, "step": 4255 }, { "epoch": 5.03, "learning_rate": 9.675325136133244e-05, "loss": 0.1812, "step": 4256 }, { "epoch": 5.04, "learning_rate": 9.671551197371818e-05, "loss": 0.1883, "step": 4257 }, { "epoch": 5.04, "learning_rate": 9.667777305440029e-05, "loss": 0.1687, "step": 4258 }, { "epoch": 5.04, "learning_rate": 9.664003460875947e-05, "loss": 0.1788, "step": 4259 }, { "epoch": 5.04, "learning_rate": 9.660229664217642e-05, "loss": 0.1695, "step": 4260 }, { "epoch": 5.04, "learning_rate": 9.656455916003176e-05, "loss": 0.1779, "step": 4261 }, { "epoch": 5.04, "learning_rate": 9.652682216770607e-05, "loss": 0.1718, "step": 4262 }, { "epoch": 5.04, "learning_rate": 9.648908567057974e-05, "loss": 0.1728, "step": 4263 }, { "epoch": 5.04, "learning_rate": 9.645134967403319e-05, "loss": 0.2087, "step": 4264 }, { "epoch": 5.05, "learning_rate": 9.64136141834468e-05, "loss": 0.1742, "step": 4265 }, { "epoch": 5.05, "learning_rate": 9.63758792042008e-05, "loss": 0.1867, "step": 4266 }, { "epoch": 5.05, "learning_rate": 9.633814474167535e-05, "loss": 0.1854, "step": 4267 }, { "epoch": 5.05, "learning_rate": 9.630041080125058e-05, "loss": 0.184, "step": 4268 }, { "epoch": 5.05, "learning_rate": 9.626267738830655e-05, "loss": 0.1796, "step": 4269 }, { "epoch": 5.05, "learning_rate": 9.62249445082232e-05, "loss": 0.1899, "step": 4270 }, { "epoch": 5.05, "learning_rate": 9.61872121663804e-05, "loss": 0.1717, "step": 4271 }, { "epoch": 5.05, "learning_rate": 9.614948036815799e-05, "loss": 0.1754, "step": 4272 }, { "epoch": 5.06, "learning_rate": 9.611174911893568e-05, "loss": 0.1796, "step": 4273 }, { "epoch": 5.06, "learning_rate": 9.607401842409317e-05, "loss": 0.1737, "step": 4274 }, { "epoch": 5.06, "learning_rate": 9.603628828900996e-05, "loss": 0.1813, "step": 4275 }, { "epoch": 5.06, "learning_rate": 9.599855871906555e-05, "loss": 0.1819, "step": 4276 }, { "epoch": 5.06, "learning_rate": 9.59608297196394e-05, "loss": 0.1775, "step": 4277 }, { "epoch": 5.06, "learning_rate": 9.592310129611089e-05, "loss": 0.1786, "step": 4278 }, { "epoch": 5.06, "learning_rate": 9.588537345385914e-05, "loss": 0.1757, "step": 4279 }, { "epoch": 5.06, "learning_rate": 9.584764619826339e-05, "loss": 0.1768, "step": 4280 }, { "epoch": 5.06, "learning_rate": 9.580991953470271e-05, "loss": 0.1705, "step": 4281 }, { "epoch": 5.07, "learning_rate": 9.577219346855613e-05, "loss": 0.1822, "step": 4282 }, { "epoch": 5.07, "learning_rate": 9.573446800520253e-05, "loss": 0.1817, "step": 4283 }, { "epoch": 5.07, "learning_rate": 9.569674315002074e-05, "loss": 0.1814, "step": 4284 }, { "epoch": 5.07, "learning_rate": 9.56590189083895e-05, "loss": 0.1789, "step": 4285 }, { "epoch": 5.07, "learning_rate": 9.562129528568753e-05, "loss": 0.174, "step": 4286 }, { "epoch": 5.07, "learning_rate": 9.558357228729327e-05, "loss": 0.1769, "step": 4287 }, { "epoch": 5.07, "learning_rate": 9.554584991858528e-05, "loss": 0.1748, "step": 4288 }, { "epoch": 5.07, "learning_rate": 9.550812818494194e-05, "loss": 0.1752, "step": 4289 }, { "epoch": 5.08, "learning_rate": 9.547040709174159e-05, "loss": 0.171, "step": 4290 }, { "epoch": 5.08, "learning_rate": 9.543268664436233e-05, "loss": 0.1822, "step": 4291 }, { "epoch": 5.08, "learning_rate": 9.539496684818233e-05, "loss": 0.1746, "step": 4292 }, { "epoch": 5.08, "learning_rate": 9.535724770857962e-05, "loss": 0.1729, "step": 4293 }, { "epoch": 5.08, "learning_rate": 9.531952923093211e-05, "loss": 0.1888, "step": 4294 }, { "epoch": 5.08, "learning_rate": 9.528181142061763e-05, "loss": 0.1786, "step": 4295 }, { "epoch": 5.08, "learning_rate": 9.524409428301392e-05, "loss": 0.202, "step": 4296 }, { "epoch": 5.08, "learning_rate": 9.520637782349863e-05, "loss": 0.1846, "step": 4297 }, { "epoch": 5.09, "learning_rate": 9.516866204744931e-05, "loss": 0.1754, "step": 4298 }, { "epoch": 5.09, "learning_rate": 9.513094696024338e-05, "loss": 0.178, "step": 4299 }, { "epoch": 5.09, "learning_rate": 9.509323256725821e-05, "loss": 0.1744, "step": 4300 }, { "epoch": 5.09, "learning_rate": 9.505551887387102e-05, "loss": 0.1791, "step": 4301 }, { "epoch": 5.09, "learning_rate": 9.501780588545901e-05, "loss": 0.1831, "step": 4302 }, { "epoch": 5.09, "learning_rate": 9.498009360739925e-05, "loss": 0.1786, "step": 4303 }, { "epoch": 5.09, "learning_rate": 9.494238204506858e-05, "loss": 0.1861, "step": 4304 }, { "epoch": 5.09, "learning_rate": 9.490467120384389e-05, "loss": 0.1823, "step": 4305 }, { "epoch": 5.09, "learning_rate": 9.486696108910198e-05, "loss": 0.1811, "step": 4306 }, { "epoch": 5.1, "learning_rate": 9.482925170621946e-05, "loss": 0.183, "step": 4307 }, { "epoch": 5.1, "learning_rate": 9.479154306057284e-05, "loss": 0.1857, "step": 4308 }, { "epoch": 5.1, "learning_rate": 9.475383515753856e-05, "loss": 0.1701, "step": 4309 }, { "epoch": 5.1, "learning_rate": 9.471612800249296e-05, "loss": 0.181, "step": 4310 }, { "epoch": 5.1, "learning_rate": 9.467842160081225e-05, "loss": 0.1735, "step": 4311 }, { "epoch": 5.1, "learning_rate": 9.464071595787253e-05, "loss": 0.1959, "step": 4312 }, { "epoch": 5.1, "learning_rate": 9.46030110790498e-05, "loss": 0.1854, "step": 4313 }, { "epoch": 5.1, "learning_rate": 9.456530696971999e-05, "loss": 0.178, "step": 4314 }, { "epoch": 5.11, "learning_rate": 9.452760363525887e-05, "loss": 0.1821, "step": 4315 }, { "epoch": 5.11, "learning_rate": 9.448990108104208e-05, "loss": 0.1774, "step": 4316 }, { "epoch": 5.11, "learning_rate": 9.44521993124452e-05, "loss": 0.1773, "step": 4317 }, { "epoch": 5.11, "learning_rate": 9.441449833484368e-05, "loss": 0.1697, "step": 4318 }, { "epoch": 5.11, "learning_rate": 9.437679815361291e-05, "loss": 0.1658, "step": 4319 }, { "epoch": 5.11, "learning_rate": 9.433909877412802e-05, "loss": 0.1817, "step": 4320 }, { "epoch": 5.11, "learning_rate": 9.430140020176416e-05, "loss": 0.1695, "step": 4321 }, { "epoch": 5.11, "learning_rate": 9.426370244189632e-05, "loss": 0.1691, "step": 4322 }, { "epoch": 5.12, "learning_rate": 9.422600549989942e-05, "loss": 0.1717, "step": 4323 }, { "epoch": 5.12, "learning_rate": 9.418830938114816e-05, "loss": 0.1788, "step": 4324 }, { "epoch": 5.12, "learning_rate": 9.415061409101721e-05, "loss": 0.1773, "step": 4325 }, { "epoch": 5.12, "learning_rate": 9.411291963488109e-05, "loss": 0.1791, "step": 4326 }, { "epoch": 5.12, "learning_rate": 9.407522601811425e-05, "loss": 0.172, "step": 4327 }, { "epoch": 5.12, "learning_rate": 9.403753324609091e-05, "loss": 0.1713, "step": 4328 }, { "epoch": 5.12, "learning_rate": 9.399984132418528e-05, "loss": 0.1958, "step": 4329 }, { "epoch": 5.12, "learning_rate": 9.396215025777139e-05, "loss": 0.1701, "step": 4330 }, { "epoch": 5.12, "learning_rate": 9.392446005222321e-05, "loss": 0.1865, "step": 4331 }, { "epoch": 5.13, "learning_rate": 9.388677071291446e-05, "loss": 0.1773, "step": 4332 }, { "epoch": 5.13, "learning_rate": 9.384908224521886e-05, "loss": 0.168, "step": 4333 }, { "epoch": 5.13, "learning_rate": 9.381139465450993e-05, "loss": 0.1963, "step": 4334 }, { "epoch": 5.13, "learning_rate": 9.37737079461612e-05, "loss": 0.1697, "step": 4335 }, { "epoch": 5.13, "learning_rate": 9.373602212554586e-05, "loss": 0.1748, "step": 4336 }, { "epoch": 5.13, "learning_rate": 9.369833719803712e-05, "loss": 0.1669, "step": 4337 }, { "epoch": 5.13, "learning_rate": 9.366065316900805e-05, "loss": 0.1726, "step": 4338 }, { "epoch": 5.13, "learning_rate": 9.362297004383158e-05, "loss": 0.1778, "step": 4339 }, { "epoch": 5.14, "learning_rate": 9.358528782788045e-05, "loss": 0.1782, "step": 4340 }, { "epoch": 5.14, "learning_rate": 9.354760652652734e-05, "loss": 0.1864, "step": 4341 }, { "epoch": 5.14, "learning_rate": 9.35099261451448e-05, "loss": 0.1761, "step": 4342 }, { "epoch": 5.14, "learning_rate": 9.347224668910528e-05, "loss": 0.1882, "step": 4343 }, { "epoch": 5.14, "learning_rate": 9.343456816378092e-05, "loss": 0.1746, "step": 4344 }, { "epoch": 5.14, "learning_rate": 9.339689057454393e-05, "loss": 0.2131, "step": 4345 }, { "epoch": 5.14, "learning_rate": 9.335921392676631e-05, "loss": 0.1772, "step": 4346 }, { "epoch": 5.14, "learning_rate": 9.332153822582e-05, "loss": 0.1787, "step": 4347 }, { "epoch": 5.15, "learning_rate": 9.32838634770766e-05, "loss": 0.1713, "step": 4348 }, { "epoch": 5.15, "learning_rate": 9.324618968590776e-05, "loss": 0.1885, "step": 4349 }, { "epoch": 5.15, "learning_rate": 9.320851685768497e-05, "loss": 0.1783, "step": 4350 }, { "epoch": 5.15, "learning_rate": 9.317084499777956e-05, "loss": 0.1833, "step": 4351 }, { "epoch": 5.15, "learning_rate": 9.313317411156264e-05, "loss": 0.1849, "step": 4352 }, { "epoch": 5.15, "learning_rate": 9.309550420440532e-05, "loss": 0.1753, "step": 4353 }, { "epoch": 5.15, "learning_rate": 9.305783528167849e-05, "loss": 0.1873, "step": 4354 }, { "epoch": 5.15, "learning_rate": 9.302016734875292e-05, "loss": 0.1798, "step": 4355 }, { "epoch": 5.15, "learning_rate": 9.298250041099924e-05, "loss": 0.1735, "step": 4356 }, { "epoch": 5.16, "learning_rate": 9.294483447378792e-05, "loss": 0.1766, "step": 4357 }, { "epoch": 5.16, "learning_rate": 9.29071695424893e-05, "loss": 0.1683, "step": 4358 }, { "epoch": 5.16, "learning_rate": 9.286950562247365e-05, "loss": 0.1815, "step": 4359 }, { "epoch": 5.16, "learning_rate": 9.283184271911089e-05, "loss": 0.1736, "step": 4360 }, { "epoch": 5.16, "learning_rate": 9.279418083777103e-05, "loss": 0.198, "step": 4361 }, { "epoch": 5.16, "learning_rate": 9.275651998382377e-05, "loss": 0.1734, "step": 4362 }, { "epoch": 5.16, "learning_rate": 9.27188601626388e-05, "loss": 0.1974, "step": 4363 }, { "epoch": 5.16, "learning_rate": 9.268120137958551e-05, "loss": 0.1813, "step": 4364 }, { "epoch": 5.17, "learning_rate": 9.264354364003327e-05, "loss": 0.1823, "step": 4365 }, { "epoch": 5.17, "learning_rate": 9.260588694935124e-05, "loss": 0.1723, "step": 4366 }, { "epoch": 5.17, "learning_rate": 9.256823131290844e-05, "loss": 0.2007, "step": 4367 }, { "epoch": 5.17, "learning_rate": 9.253057673607376e-05, "loss": 0.1844, "step": 4368 }, { "epoch": 5.17, "learning_rate": 9.249292322421589e-05, "loss": 0.1773, "step": 4369 }, { "epoch": 5.17, "learning_rate": 9.245527078270341e-05, "loss": 0.1707, "step": 4370 }, { "epoch": 5.17, "learning_rate": 9.241761941690474e-05, "loss": 0.1769, "step": 4371 }, { "epoch": 5.17, "learning_rate": 9.237996913218819e-05, "loss": 0.1741, "step": 4372 }, { "epoch": 5.18, "learning_rate": 9.234231993392177e-05, "loss": 0.1807, "step": 4373 }, { "epoch": 5.18, "learning_rate": 9.230467182747351e-05, "loss": 0.1746, "step": 4374 }, { "epoch": 5.18, "learning_rate": 9.226702481821118e-05, "loss": 0.1794, "step": 4375 }, { "epoch": 5.18, "learning_rate": 9.222937891150249e-05, "loss": 0.1798, "step": 4376 }, { "epoch": 5.18, "learning_rate": 9.21917341127148e-05, "loss": 0.1682, "step": 4377 }, { "epoch": 5.18, "learning_rate": 9.215409042721552e-05, "loss": 0.196, "step": 4378 }, { "epoch": 5.18, "learning_rate": 9.211644786037179e-05, "loss": 0.1723, "step": 4379 }, { "epoch": 5.18, "learning_rate": 9.207880641755065e-05, "loss": 0.1698, "step": 4380 }, { "epoch": 5.18, "learning_rate": 9.204116610411893e-05, "loss": 0.1717, "step": 4381 }, { "epoch": 5.19, "learning_rate": 9.20035269254433e-05, "loss": 0.1823, "step": 4382 }, { "epoch": 5.19, "learning_rate": 9.19658888868903e-05, "loss": 0.1758, "step": 4383 }, { "epoch": 5.19, "learning_rate": 9.192825199382632e-05, "loss": 0.1773, "step": 4384 }, { "epoch": 5.19, "learning_rate": 9.189061625161751e-05, "loss": 0.1777, "step": 4385 }, { "epoch": 5.19, "learning_rate": 9.185298166562994e-05, "loss": 0.1751, "step": 4386 }, { "epoch": 5.19, "learning_rate": 9.181534824122947e-05, "loss": 0.1813, "step": 4387 }, { "epoch": 5.19, "learning_rate": 9.177771598378185e-05, "loss": 0.1863, "step": 4388 }, { "epoch": 5.19, "learning_rate": 9.174008489865253e-05, "loss": 0.1933, "step": 4389 }, { "epoch": 5.19, "eval_loss": 3.8528122901916504, "eval_runtime": 284.0506, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 4389 }, { "epoch": 5.2, "learning_rate": 9.170245499120693e-05, "loss": 0.1716, "step": 4390 }, { "epoch": 5.2, "learning_rate": 9.166482626681024e-05, "loss": 0.1875, "step": 4391 }, { "epoch": 5.2, "learning_rate": 9.162719873082757e-05, "loss": 0.1881, "step": 4392 }, { "epoch": 5.2, "learning_rate": 9.158957238862367e-05, "loss": 0.1838, "step": 4393 }, { "epoch": 5.2, "learning_rate": 9.155194724556331e-05, "loss": 0.1724, "step": 4394 }, { "epoch": 5.2, "learning_rate": 9.151432330701097e-05, "loss": 0.1859, "step": 4395 }, { "epoch": 5.2, "learning_rate": 9.147670057833107e-05, "loss": 0.1739, "step": 4396 }, { "epoch": 5.2, "learning_rate": 9.143907906488772e-05, "loss": 0.1885, "step": 4397 }, { "epoch": 5.21, "learning_rate": 9.140145877204496e-05, "loss": 0.1828, "step": 4398 }, { "epoch": 5.21, "learning_rate": 9.13638397051666e-05, "loss": 0.1937, "step": 4399 }, { "epoch": 5.21, "learning_rate": 9.132622186961637e-05, "loss": 0.1731, "step": 4400 }, { "epoch": 5.21, "learning_rate": 9.128860527075767e-05, "loss": 0.1721, "step": 4401 }, { "epoch": 5.21, "learning_rate": 9.125098991395378e-05, "loss": 0.1724, "step": 4402 }, { "epoch": 5.21, "learning_rate": 9.121337580456793e-05, "loss": 0.18, "step": 4403 }, { "epoch": 5.21, "learning_rate": 9.117576294796307e-05, "loss": 0.1731, "step": 4404 }, { "epoch": 5.21, "learning_rate": 9.11381513495019e-05, "loss": 0.1728, "step": 4405 }, { "epoch": 5.21, "learning_rate": 9.110054101454701e-05, "loss": 0.1787, "step": 4406 }, { "epoch": 5.22, "learning_rate": 9.106293194846087e-05, "loss": 0.1748, "step": 4407 }, { "epoch": 5.22, "learning_rate": 9.102532415660571e-05, "loss": 0.177, "step": 4408 }, { "epoch": 5.22, "learning_rate": 9.098771764434353e-05, "loss": 0.1896, "step": 4409 }, { "epoch": 5.22, "learning_rate": 9.095011241703623e-05, "loss": 0.1702, "step": 4410 }, { "epoch": 5.22, "learning_rate": 9.091250848004549e-05, "loss": 0.18, "step": 4411 }, { "epoch": 5.22, "learning_rate": 9.087490583873284e-05, "loss": 0.1788, "step": 4412 }, { "epoch": 5.22, "learning_rate": 9.083730449845954e-05, "loss": 0.1782, "step": 4413 }, { "epoch": 5.22, "learning_rate": 9.079970446458677e-05, "loss": 0.1799, "step": 4414 }, { "epoch": 5.23, "learning_rate": 9.076210574247543e-05, "loss": 0.1827, "step": 4415 }, { "epoch": 5.23, "learning_rate": 9.072450833748637e-05, "loss": 0.1981, "step": 4416 }, { "epoch": 5.23, "learning_rate": 9.068691225498004e-05, "loss": 0.1687, "step": 4417 }, { "epoch": 5.23, "learning_rate": 9.064931750031688e-05, "loss": 0.1759, "step": 4418 }, { "epoch": 5.23, "learning_rate": 9.061172407885707e-05, "loss": 0.1934, "step": 4419 }, { "epoch": 5.23, "learning_rate": 9.057413199596065e-05, "loss": 0.1791, "step": 4420 }, { "epoch": 5.23, "learning_rate": 9.053654125698738e-05, "loss": 0.1718, "step": 4421 }, { "epoch": 5.23, "learning_rate": 9.049895186729688e-05, "loss": 0.178, "step": 4422 }, { "epoch": 5.24, "learning_rate": 9.046136383224862e-05, "loss": 0.1848, "step": 4423 }, { "epoch": 5.24, "learning_rate": 9.042377715720182e-05, "loss": 0.1726, "step": 4424 }, { "epoch": 5.24, "learning_rate": 9.038619184751549e-05, "loss": 0.1924, "step": 4425 }, { "epoch": 5.24, "learning_rate": 9.034860790854849e-05, "loss": 0.1786, "step": 4426 }, { "epoch": 5.24, "learning_rate": 9.031102534565949e-05, "loss": 0.1899, "step": 4427 }, { "epoch": 5.24, "learning_rate": 9.027344416420695e-05, "loss": 0.1987, "step": 4428 }, { "epoch": 5.24, "learning_rate": 9.023586436954909e-05, "loss": 0.2179, "step": 4429 }, { "epoch": 5.24, "learning_rate": 9.019828596704394e-05, "loss": 0.1737, "step": 4430 }, { "epoch": 5.24, "learning_rate": 9.016070896204943e-05, "loss": 0.1978, "step": 4431 }, { "epoch": 5.25, "learning_rate": 9.01231333599232e-05, "loss": 0.1794, "step": 4432 }, { "epoch": 5.25, "learning_rate": 9.008555916602276e-05, "loss": 0.1758, "step": 4433 }, { "epoch": 5.25, "learning_rate": 9.004798638570527e-05, "loss": 0.1792, "step": 4434 }, { "epoch": 5.25, "learning_rate": 9.001041502432783e-05, "loss": 0.1782, "step": 4435 }, { "epoch": 5.25, "learning_rate": 8.99728450872473e-05, "loss": 0.186, "step": 4436 }, { "epoch": 5.25, "learning_rate": 8.993527657982036e-05, "loss": 0.1911, "step": 4437 }, { "epoch": 5.25, "learning_rate": 8.989770950740344e-05, "loss": 0.1758, "step": 4438 }, { "epoch": 5.25, "learning_rate": 8.986014387535275e-05, "loss": 0.1783, "step": 4439 }, { "epoch": 5.26, "learning_rate": 8.982257968902438e-05, "loss": 0.1789, "step": 4440 }, { "epoch": 5.26, "learning_rate": 8.978501695377415e-05, "loss": 0.1784, "step": 4441 }, { "epoch": 5.26, "learning_rate": 8.974745567495768e-05, "loss": 0.184, "step": 4442 }, { "epoch": 5.26, "learning_rate": 8.970989585793039e-05, "loss": 0.1774, "step": 4443 }, { "epoch": 5.26, "learning_rate": 8.967233750804747e-05, "loss": 0.1653, "step": 4444 }, { "epoch": 5.26, "learning_rate": 8.963478063066402e-05, "loss": 0.1782, "step": 4445 }, { "epoch": 5.26, "learning_rate": 8.959722523113469e-05, "loss": 0.1687, "step": 4446 }, { "epoch": 5.26, "learning_rate": 8.955967131481412e-05, "loss": 0.1822, "step": 4447 }, { "epoch": 5.27, "learning_rate": 8.952211888705668e-05, "loss": 0.1862, "step": 4448 }, { "epoch": 5.27, "learning_rate": 8.948456795321657e-05, "loss": 0.1959, "step": 4449 }, { "epoch": 5.27, "learning_rate": 8.944701851864767e-05, "loss": 0.1898, "step": 4450 }, { "epoch": 5.27, "learning_rate": 8.940947058870373e-05, "loss": 0.1803, "step": 4451 }, { "epoch": 5.27, "learning_rate": 8.937192416873828e-05, "loss": 0.1812, "step": 4452 }, { "epoch": 5.27, "learning_rate": 8.933437926410463e-05, "loss": 0.1868, "step": 4453 }, { "epoch": 5.27, "learning_rate": 8.929683588015582e-05, "loss": 0.1829, "step": 4454 }, { "epoch": 5.27, "learning_rate": 8.925929402224475e-05, "loss": 0.1803, "step": 4455 }, { "epoch": 5.27, "learning_rate": 8.922175369572407e-05, "loss": 0.1763, "step": 4456 }, { "epoch": 5.28, "learning_rate": 8.918421490594623e-05, "loss": 0.1831, "step": 4457 }, { "epoch": 5.28, "learning_rate": 8.914667765826338e-05, "loss": 0.1721, "step": 4458 }, { "epoch": 5.28, "learning_rate": 8.910914195802754e-05, "loss": 0.1716, "step": 4459 }, { "epoch": 5.28, "learning_rate": 8.907160781059052e-05, "loss": 0.1785, "step": 4460 }, { "epoch": 5.28, "learning_rate": 8.903407522130386e-05, "loss": 0.1745, "step": 4461 }, { "epoch": 5.28, "learning_rate": 8.899654419551886e-05, "loss": 0.1902, "step": 4462 }, { "epoch": 5.28, "learning_rate": 8.895901473858663e-05, "loss": 0.176, "step": 4463 }, { "epoch": 5.28, "learning_rate": 8.892148685585805e-05, "loss": 0.18, "step": 4464 }, { "epoch": 5.29, "learning_rate": 8.88839605526838e-05, "loss": 0.1802, "step": 4465 }, { "epoch": 5.29, "learning_rate": 8.88464358344143e-05, "loss": 0.1953, "step": 4466 }, { "epoch": 5.29, "learning_rate": 8.880891270639975e-05, "loss": 0.1848, "step": 4467 }, { "epoch": 5.29, "learning_rate": 8.877139117399014e-05, "loss": 0.2009, "step": 4468 }, { "epoch": 5.29, "learning_rate": 8.873387124253525e-05, "loss": 0.1784, "step": 4469 }, { "epoch": 5.29, "learning_rate": 8.869635291738452e-05, "loss": 0.1758, "step": 4470 }, { "epoch": 5.29, "learning_rate": 8.86588362038873e-05, "loss": 0.1724, "step": 4471 }, { "epoch": 5.29, "learning_rate": 8.862132110739266e-05, "loss": 0.1668, "step": 4472 }, { "epoch": 5.3, "learning_rate": 8.858380763324948e-05, "loss": 0.1696, "step": 4473 }, { "epoch": 5.3, "learning_rate": 8.854629578680624e-05, "loss": 0.1691, "step": 4474 }, { "epoch": 5.3, "learning_rate": 8.85087855734114e-05, "loss": 0.1742, "step": 4475 }, { "epoch": 5.3, "learning_rate": 8.847127699841307e-05, "loss": 0.1727, "step": 4476 }, { "epoch": 5.3, "learning_rate": 8.84337700671592e-05, "loss": 0.1749, "step": 4477 }, { "epoch": 5.3, "learning_rate": 8.839626478499738e-05, "loss": 0.1822, "step": 4478 }, { "epoch": 5.3, "learning_rate": 8.835876115727509e-05, "loss": 0.1784, "step": 4479 }, { "epoch": 5.3, "learning_rate": 8.832125918933954e-05, "loss": 0.1907, "step": 4480 }, { "epoch": 5.3, "learning_rate": 8.82837588865377e-05, "loss": 0.1825, "step": 4481 }, { "epoch": 5.31, "learning_rate": 8.824626025421626e-05, "loss": 0.1856, "step": 4482 }, { "epoch": 5.31, "learning_rate": 8.82087632977217e-05, "loss": 0.1809, "step": 4483 }, { "epoch": 5.31, "learning_rate": 8.81712680224003e-05, "loss": 0.1737, "step": 4484 }, { "epoch": 5.31, "learning_rate": 8.81337744335981e-05, "loss": 0.186, "step": 4485 }, { "epoch": 5.31, "learning_rate": 8.809628253666079e-05, "loss": 0.1741, "step": 4486 }, { "epoch": 5.31, "learning_rate": 8.805879233693393e-05, "loss": 0.1857, "step": 4487 }, { "epoch": 5.31, "learning_rate": 8.802130383976279e-05, "loss": 0.1741, "step": 4488 }, { "epoch": 5.31, "learning_rate": 8.798381705049248e-05, "loss": 0.1881, "step": 4489 }, { "epoch": 5.32, "learning_rate": 8.79463319744677e-05, "loss": 0.1785, "step": 4490 }, { "epoch": 5.32, "learning_rate": 8.790884861703308e-05, "loss": 0.1839, "step": 4491 }, { "epoch": 5.32, "learning_rate": 8.787136698353289e-05, "loss": 0.1728, "step": 4492 }, { "epoch": 5.32, "learning_rate": 8.783388707931122e-05, "loss": 0.1799, "step": 4493 }, { "epoch": 5.32, "learning_rate": 8.779640890971186e-05, "loss": 0.1759, "step": 4494 }, { "epoch": 5.32, "learning_rate": 8.775893248007839e-05, "loss": 0.1855, "step": 4495 }, { "epoch": 5.32, "learning_rate": 8.772145779575413e-05, "loss": 0.1856, "step": 4496 }, { "epoch": 5.32, "learning_rate": 8.768398486208215e-05, "loss": 0.1792, "step": 4497 }, { "epoch": 5.33, "learning_rate": 8.764651368440531e-05, "loss": 0.1816, "step": 4498 }, { "epoch": 5.33, "learning_rate": 8.760904426806612e-05, "loss": 0.183, "step": 4499 }, { "epoch": 5.33, "learning_rate": 8.757157661840693e-05, "loss": 0.1811, "step": 4500 }, { "epoch": 5.33, "learning_rate": 8.753411074076982e-05, "loss": 0.1745, "step": 4501 }, { "epoch": 5.33, "learning_rate": 8.749664664049663e-05, "loss": 0.1768, "step": 4502 }, { "epoch": 5.33, "learning_rate": 8.745918432292887e-05, "loss": 0.1762, "step": 4503 }, { "epoch": 5.33, "learning_rate": 8.742172379340785e-05, "loss": 0.1864, "step": 4504 }, { "epoch": 5.33, "learning_rate": 8.738426505727466e-05, "loss": 0.1759, "step": 4505 }, { "epoch": 5.33, "learning_rate": 8.73468081198701e-05, "loss": 0.2045, "step": 4506 }, { "epoch": 5.34, "learning_rate": 8.730935298653467e-05, "loss": 0.1714, "step": 4507 }, { "epoch": 5.34, "learning_rate": 8.727189966260869e-05, "loss": 0.1943, "step": 4508 }, { "epoch": 5.34, "learning_rate": 8.723444815343217e-05, "loss": 0.1724, "step": 4509 }, { "epoch": 5.34, "learning_rate": 8.719699846434492e-05, "loss": 0.1679, "step": 4510 }, { "epoch": 5.34, "learning_rate": 8.715955060068638e-05, "loss": 0.1962, "step": 4511 }, { "epoch": 5.34, "learning_rate": 8.712210456779584e-05, "loss": 0.1766, "step": 4512 }, { "epoch": 5.34, "learning_rate": 8.708466037101229e-05, "loss": 0.1864, "step": 4513 }, { "epoch": 5.34, "learning_rate": 8.704721801567448e-05, "loss": 0.1746, "step": 4514 }, { "epoch": 5.35, "learning_rate": 8.70097775071208e-05, "loss": 0.1707, "step": 4515 }, { "epoch": 5.35, "learning_rate": 8.69723388506895e-05, "loss": 0.1717, "step": 4516 }, { "epoch": 5.35, "learning_rate": 8.693490205171846e-05, "loss": 0.183, "step": 4517 }, { "epoch": 5.35, "learning_rate": 8.689746711554548e-05, "loss": 0.1812, "step": 4518 }, { "epoch": 5.35, "learning_rate": 8.686003404750785e-05, "loss": 0.1894, "step": 4519 }, { "epoch": 5.35, "learning_rate": 8.682260285294271e-05, "loss": 0.1726, "step": 4520 }, { "epoch": 5.35, "learning_rate": 8.678517353718698e-05, "loss": 0.1823, "step": 4521 }, { "epoch": 5.35, "learning_rate": 8.674774610557728e-05, "loss": 0.1754, "step": 4522 }, { "epoch": 5.36, "learning_rate": 8.671032056344988e-05, "loss": 0.185, "step": 4523 }, { "epoch": 5.36, "learning_rate": 8.667289691614087e-05, "loss": 0.1828, "step": 4524 }, { "epoch": 5.36, "learning_rate": 8.663547516898607e-05, "loss": 0.2032, "step": 4525 }, { "epoch": 5.36, "learning_rate": 8.659805532732103e-05, "loss": 0.1707, "step": 4526 }, { "epoch": 5.36, "learning_rate": 8.656063739648088e-05, "loss": 0.1816, "step": 4527 }, { "epoch": 5.36, "learning_rate": 8.652322138180072e-05, "loss": 0.1761, "step": 4528 }, { "epoch": 5.36, "learning_rate": 8.648580728861521e-05, "loss": 0.1816, "step": 4529 }, { "epoch": 5.36, "learning_rate": 8.644839512225886e-05, "loss": 0.1782, "step": 4530 }, { "epoch": 5.36, "learning_rate": 8.64109848880657e-05, "loss": 0.1844, "step": 4531 }, { "epoch": 5.37, "learning_rate": 8.637357659136967e-05, "loss": 0.1915, "step": 4532 }, { "epoch": 5.37, "learning_rate": 8.63361702375044e-05, "loss": 0.1759, "step": 4533 }, { "epoch": 5.37, "learning_rate": 8.629876583180321e-05, "loss": 0.1769, "step": 4534 }, { "epoch": 5.37, "learning_rate": 8.626136337959914e-05, "loss": 0.1888, "step": 4535 }, { "epoch": 5.37, "learning_rate": 8.622396288622497e-05, "loss": 0.1784, "step": 4536 }, { "epoch": 5.37, "learning_rate": 8.618656435701318e-05, "loss": 0.1936, "step": 4537 }, { "epoch": 5.37, "learning_rate": 8.614916779729603e-05, "loss": 0.1701, "step": 4538 }, { "epoch": 5.37, "learning_rate": 8.611177321240539e-05, "loss": 0.1861, "step": 4539 }, { "epoch": 5.38, "learning_rate": 8.607438060767296e-05, "loss": 0.1742, "step": 4540 }, { "epoch": 5.38, "learning_rate": 8.603698998843009e-05, "loss": 0.1939, "step": 4541 }, { "epoch": 5.38, "learning_rate": 8.59996013600079e-05, "loss": 0.1835, "step": 4542 }, { "epoch": 5.38, "learning_rate": 8.596221472773714e-05, "loss": 0.1904, "step": 4543 }, { "epoch": 5.38, "learning_rate": 8.592483009694834e-05, "loss": 0.1766, "step": 4544 }, { "epoch": 5.38, "learning_rate": 8.588744747297173e-05, "loss": 0.1765, "step": 4545 }, { "epoch": 5.38, "learning_rate": 8.585006686113733e-05, "loss": 0.1674, "step": 4546 }, { "epoch": 5.38, "learning_rate": 8.58126882667747e-05, "loss": 0.1759, "step": 4547 }, { "epoch": 5.39, "learning_rate": 8.577531169521324e-05, "loss": 0.1673, "step": 4548 }, { "epoch": 5.39, "learning_rate": 8.573793715178206e-05, "loss": 0.1788, "step": 4549 }, { "epoch": 5.39, "learning_rate": 8.570056464180998e-05, "loss": 0.1751, "step": 4550 }, { "epoch": 5.39, "learning_rate": 8.566319417062543e-05, "loss": 0.1804, "step": 4551 }, { "epoch": 5.39, "learning_rate": 8.562582574355666e-05, "loss": 0.1896, "step": 4552 }, { "epoch": 5.39, "learning_rate": 8.55884593659316e-05, "loss": 0.1899, "step": 4553 }, { "epoch": 5.39, "learning_rate": 8.55510950430779e-05, "loss": 0.1898, "step": 4554 }, { "epoch": 5.39, "learning_rate": 8.551373278032284e-05, "loss": 0.2023, "step": 4555 }, { "epoch": 5.39, "learning_rate": 8.547637258299348e-05, "loss": 0.186, "step": 4556 }, { "epoch": 5.4, "learning_rate": 8.54390144564166e-05, "loss": 0.1719, "step": 4557 }, { "epoch": 5.4, "learning_rate": 8.540165840591867e-05, "loss": 0.1735, "step": 4558 }, { "epoch": 5.4, "learning_rate": 8.53643044368258e-05, "loss": 0.1762, "step": 4559 }, { "epoch": 5.4, "learning_rate": 8.532695255446383e-05, "loss": 0.167, "step": 4560 }, { "epoch": 5.4, "learning_rate": 8.52896027641584e-05, "loss": 0.1851, "step": 4561 }, { "epoch": 5.4, "learning_rate": 8.525225507123471e-05, "loss": 0.1975, "step": 4562 }, { "epoch": 5.4, "learning_rate": 8.521490948101777e-05, "loss": 0.1665, "step": 4563 }, { "epoch": 5.4, "learning_rate": 8.517756599883224e-05, "loss": 0.1691, "step": 4564 }, { "epoch": 5.41, "learning_rate": 8.514022463000244e-05, "loss": 0.2015, "step": 4565 }, { "epoch": 5.41, "learning_rate": 8.51028853798525e-05, "loss": 0.1806, "step": 4566 }, { "epoch": 5.41, "learning_rate": 8.506554825370615e-05, "loss": 0.1743, "step": 4567 }, { "epoch": 5.41, "learning_rate": 8.502821325688684e-05, "loss": 0.1819, "step": 4568 }, { "epoch": 5.41, "learning_rate": 8.499088039471774e-05, "loss": 0.1787, "step": 4569 }, { "epoch": 5.41, "learning_rate": 8.495354967252169e-05, "loss": 0.1879, "step": 4570 }, { "epoch": 5.41, "learning_rate": 8.49162210956213e-05, "loss": 0.177, "step": 4571 }, { "epoch": 5.41, "learning_rate": 8.48788946693387e-05, "loss": 0.1779, "step": 4572 }, { "epoch": 5.42, "learning_rate": 8.48415703989959e-05, "loss": 0.1748, "step": 4573 }, { "epoch": 5.42, "learning_rate": 8.480424828991448e-05, "loss": 0.1807, "step": 4574 }, { "epoch": 5.42, "learning_rate": 8.476692834741585e-05, "loss": 0.1697, "step": 4575 }, { "epoch": 5.42, "learning_rate": 8.472961057682092e-05, "loss": 0.1974, "step": 4576 }, { "epoch": 5.42, "learning_rate": 8.469229498345042e-05, "loss": 0.1773, "step": 4577 }, { "epoch": 5.42, "learning_rate": 8.465498157262474e-05, "loss": 0.1737, "step": 4578 }, { "epoch": 5.42, "learning_rate": 8.4617670349664e-05, "loss": 0.1799, "step": 4579 }, { "epoch": 5.42, "learning_rate": 8.458036131988792e-05, "loss": 0.1753, "step": 4580 }, { "epoch": 5.42, "learning_rate": 8.454305448861595e-05, "loss": 0.1718, "step": 4581 }, { "epoch": 5.43, "learning_rate": 8.450574986116724e-05, "loss": 0.1736, "step": 4582 }, { "epoch": 5.43, "learning_rate": 8.446844744286068e-05, "loss": 0.1685, "step": 4583 }, { "epoch": 5.43, "learning_rate": 8.443114723901466e-05, "loss": 0.2072, "step": 4584 }, { "epoch": 5.43, "learning_rate": 8.439384925494743e-05, "loss": 0.5173, "step": 4585 }, { "epoch": 5.43, "learning_rate": 8.435655349597689e-05, "loss": 0.1836, "step": 4586 }, { "epoch": 5.43, "learning_rate": 8.431925996742065e-05, "loss": 0.2294, "step": 4587 }, { "epoch": 5.43, "learning_rate": 8.428196867459585e-05, "loss": 0.1875, "step": 4588 }, { "epoch": 5.43, "learning_rate": 8.424467962281945e-05, "loss": 0.1836, "step": 4589 }, { "epoch": 5.44, "learning_rate": 8.420739281740805e-05, "loss": 0.1751, "step": 4590 }, { "epoch": 5.44, "learning_rate": 8.417010826367799e-05, "loss": 0.1763, "step": 4591 }, { "epoch": 5.44, "learning_rate": 8.413282596694516e-05, "loss": 0.179, "step": 4592 }, { "epoch": 5.44, "learning_rate": 8.409554593252523e-05, "loss": 0.1869, "step": 4593 }, { "epoch": 5.44, "learning_rate": 8.405826816573353e-05, "loss": 0.2023, "step": 4594 }, { "epoch": 5.44, "learning_rate": 8.402099267188508e-05, "loss": 0.1727, "step": 4595 }, { "epoch": 5.44, "learning_rate": 8.398371945629448e-05, "loss": 0.1708, "step": 4596 }, { "epoch": 5.44, "learning_rate": 8.394644852427615e-05, "loss": 0.1766, "step": 4597 }, { "epoch": 5.45, "learning_rate": 8.390917988114406e-05, "loss": 0.1744, "step": 4598 }, { "epoch": 5.45, "eval_loss": 3.7694623470306396, "eval_runtime": 283.9246, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 4598 }, { "epoch": 5.45, "learning_rate": 8.387191353221198e-05, "loss": 0.186, "step": 4599 }, { "epoch": 5.45, "learning_rate": 8.383464948279319e-05, "loss": 0.2095, "step": 4600 }, { "epoch": 5.45, "learning_rate": 8.379738773820076e-05, "loss": 0.1839, "step": 4601 }, { "epoch": 5.45, "learning_rate": 8.37601283037474e-05, "loss": 0.1666, "step": 4602 }, { "epoch": 5.45, "learning_rate": 8.372287118474553e-05, "loss": 0.1732, "step": 4603 }, { "epoch": 5.45, "learning_rate": 8.368561638650717e-05, "loss": 0.176, "step": 4604 }, { "epoch": 5.45, "learning_rate": 8.364836391434402e-05, "loss": 0.1774, "step": 4605 }, { "epoch": 5.45, "learning_rate": 8.361111377356751e-05, "loss": 0.1795, "step": 4606 }, { "epoch": 5.46, "learning_rate": 8.35738659694887e-05, "loss": 0.1814, "step": 4607 }, { "epoch": 5.46, "learning_rate": 8.353662050741827e-05, "loss": 0.176, "step": 4608 }, { "epoch": 5.46, "learning_rate": 8.349937739266665e-05, "loss": 0.1716, "step": 4609 }, { "epoch": 5.46, "learning_rate": 8.346213663054387e-05, "loss": 0.1784, "step": 4610 }, { "epoch": 5.46, "learning_rate": 8.342489822635971e-05, "loss": 0.1961, "step": 4611 }, { "epoch": 5.46, "learning_rate": 8.338766218542347e-05, "loss": 0.1948, "step": 4612 }, { "epoch": 5.46, "learning_rate": 8.335042851304421e-05, "loss": 0.1819, "step": 4613 }, { "epoch": 5.46, "learning_rate": 8.33131972145307e-05, "loss": 0.172, "step": 4614 }, { "epoch": 5.47, "learning_rate": 8.327596829519132e-05, "loss": 0.1786, "step": 4615 }, { "epoch": 5.47, "learning_rate": 8.3238741760334e-05, "loss": 0.1888, "step": 4616 }, { "epoch": 5.47, "learning_rate": 8.32015176152665e-05, "loss": 0.1794, "step": 4617 }, { "epoch": 5.47, "learning_rate": 8.316429586529615e-05, "loss": 0.1766, "step": 4618 }, { "epoch": 5.47, "learning_rate": 8.312707651573e-05, "loss": 0.1726, "step": 4619 }, { "epoch": 5.47, "learning_rate": 8.308985957187466e-05, "loss": 0.1708, "step": 4620 }, { "epoch": 5.47, "learning_rate": 8.30526450390365e-05, "loss": 0.204, "step": 4621 }, { "epoch": 5.47, "learning_rate": 8.301543292252146e-05, "loss": 0.18, "step": 4622 }, { "epoch": 5.48, "learning_rate": 8.297822322763526e-05, "loss": 0.1852, "step": 4623 }, { "epoch": 5.48, "learning_rate": 8.294101595968304e-05, "loss": 0.1956, "step": 4624 }, { "epoch": 5.48, "learning_rate": 8.290381112396987e-05, "loss": 0.1782, "step": 4625 }, { "epoch": 5.48, "learning_rate": 8.286660872580032e-05, "loss": 0.1778, "step": 4626 }, { "epoch": 5.48, "learning_rate": 8.282940877047864e-05, "loss": 0.1855, "step": 4627 }, { "epoch": 5.48, "learning_rate": 8.279221126330874e-05, "loss": 0.176, "step": 4628 }, { "epoch": 5.48, "learning_rate": 8.275501620959414e-05, "loss": 0.1777, "step": 4629 }, { "epoch": 5.48, "learning_rate": 8.271782361463805e-05, "loss": 0.1753, "step": 4630 }, { "epoch": 5.48, "learning_rate": 8.268063348374334e-05, "loss": 0.1918, "step": 4631 }, { "epoch": 5.49, "learning_rate": 8.264344582221252e-05, "loss": 0.2044, "step": 4632 }, { "epoch": 5.49, "learning_rate": 8.260626063534772e-05, "loss": 0.1755, "step": 4633 }, { "epoch": 5.49, "learning_rate": 8.256907792845072e-05, "loss": 0.1666, "step": 4634 }, { "epoch": 5.49, "learning_rate": 8.253189770682301e-05, "loss": 0.1709, "step": 4635 }, { "epoch": 5.49, "learning_rate": 8.249471997576565e-05, "loss": 0.1813, "step": 4636 }, { "epoch": 5.49, "learning_rate": 8.245754474057937e-05, "loss": 0.1806, "step": 4637 }, { "epoch": 5.49, "learning_rate": 8.242037200656455e-05, "loss": 0.1723, "step": 4638 }, { "epoch": 5.49, "learning_rate": 8.238320177902121e-05, "loss": 0.1792, "step": 4639 }, { "epoch": 5.5, "learning_rate": 8.234603406324908e-05, "loss": 0.1744, "step": 4640 }, { "epoch": 5.5, "learning_rate": 8.230886886454734e-05, "loss": 0.1718, "step": 4641 }, { "epoch": 5.5, "learning_rate": 8.227170618821499e-05, "loss": 0.1783, "step": 4642 }, { "epoch": 5.5, "learning_rate": 8.223454603955065e-05, "loss": 0.1656, "step": 4643 }, { "epoch": 5.5, "learning_rate": 8.219738842385256e-05, "loss": 0.1761, "step": 4644 }, { "epoch": 5.5, "learning_rate": 8.216023334641851e-05, "loss": 0.1736, "step": 4645 }, { "epoch": 5.5, "learning_rate": 8.212308081254605e-05, "loss": 0.1811, "step": 4646 }, { "epoch": 5.5, "learning_rate": 8.208593082753232e-05, "loss": 0.1806, "step": 4647 }, { "epoch": 5.51, "learning_rate": 8.20487833966741e-05, "loss": 0.1751, "step": 4648 }, { "epoch": 5.51, "learning_rate": 8.201163852526779e-05, "loss": 0.1731, "step": 4649 }, { "epoch": 5.51, "learning_rate": 8.197449621860943e-05, "loss": 0.1744, "step": 4650 }, { "epoch": 5.51, "learning_rate": 8.193735648199472e-05, "loss": 0.1788, "step": 4651 }, { "epoch": 5.51, "learning_rate": 8.1900219320719e-05, "loss": 0.1692, "step": 4652 }, { "epoch": 5.51, "learning_rate": 8.186308474007716e-05, "loss": 0.1795, "step": 4653 }, { "epoch": 5.51, "learning_rate": 8.182595274536383e-05, "loss": 0.1855, "step": 4654 }, { "epoch": 5.51, "learning_rate": 8.178882334187319e-05, "loss": 0.177, "step": 4655 }, { "epoch": 5.52, "learning_rate": 8.175169653489915e-05, "loss": 0.1849, "step": 4656 }, { "epoch": 5.52, "learning_rate": 8.171457232973509e-05, "loss": 0.1645, "step": 4657 }, { "epoch": 5.52, "learning_rate": 8.167745073167415e-05, "loss": 0.1802, "step": 4658 }, { "epoch": 5.52, "learning_rate": 8.164033174600905e-05, "loss": 0.2054, "step": 4659 }, { "epoch": 5.52, "learning_rate": 8.16032153780322e-05, "loss": 0.1804, "step": 4660 }, { "epoch": 5.52, "learning_rate": 8.156610163303554e-05, "loss": 0.1704, "step": 4661 }, { "epoch": 5.52, "learning_rate": 8.152899051631065e-05, "loss": 0.1784, "step": 4662 }, { "epoch": 5.52, "learning_rate": 8.149188203314883e-05, "loss": 0.1733, "step": 4663 }, { "epoch": 5.52, "learning_rate": 8.145477618884093e-05, "loss": 0.1738, "step": 4664 }, { "epoch": 5.53, "learning_rate": 8.141767298867738e-05, "loss": 0.183, "step": 4665 }, { "epoch": 5.53, "learning_rate": 8.138057243794833e-05, "loss": 0.1715, "step": 4666 }, { "epoch": 5.53, "learning_rate": 8.134347454194351e-05, "loss": 0.1915, "step": 4667 }, { "epoch": 5.53, "learning_rate": 8.130637930595231e-05, "loss": 0.1755, "step": 4668 }, { "epoch": 5.53, "learning_rate": 8.12692867352636e-05, "loss": 0.1781, "step": 4669 }, { "epoch": 5.53, "learning_rate": 8.123219683516603e-05, "loss": 0.1676, "step": 4670 }, { "epoch": 5.53, "learning_rate": 8.119510961094778e-05, "loss": 0.1843, "step": 4671 }, { "epoch": 5.53, "learning_rate": 8.115802506789679e-05, "loss": 0.1848, "step": 4672 }, { "epoch": 5.54, "learning_rate": 8.112094321130035e-05, "loss": 0.1824, "step": 4673 }, { "epoch": 5.54, "learning_rate": 8.108386404644561e-05, "loss": 0.1844, "step": 4674 }, { "epoch": 5.54, "learning_rate": 8.104678757861925e-05, "loss": 0.1742, "step": 4675 }, { "epoch": 5.54, "learning_rate": 8.100971381310756e-05, "loss": 0.1749, "step": 4676 }, { "epoch": 5.54, "learning_rate": 8.097264275519642e-05, "loss": 0.1754, "step": 4677 }, { "epoch": 5.54, "learning_rate": 8.093557441017139e-05, "loss": 0.1897, "step": 4678 }, { "epoch": 5.54, "learning_rate": 8.089850878331758e-05, "loss": 0.2023, "step": 4679 }, { "epoch": 5.54, "learning_rate": 8.08614458799198e-05, "loss": 0.1795, "step": 4680 }, { "epoch": 5.55, "learning_rate": 8.082438570526232e-05, "loss": 0.1939, "step": 4681 }, { "epoch": 5.55, "learning_rate": 8.078732826462915e-05, "loss": 0.1919, "step": 4682 }, { "epoch": 5.55, "learning_rate": 8.075027356330391e-05, "loss": 0.1789, "step": 4683 }, { "epoch": 5.55, "learning_rate": 8.07132216065698e-05, "loss": 0.1798, "step": 4684 }, { "epoch": 5.55, "learning_rate": 8.067617239970952e-05, "loss": 0.1861, "step": 4685 }, { "epoch": 5.55, "learning_rate": 8.063912594800556e-05, "loss": 0.1874, "step": 4686 }, { "epoch": 5.55, "learning_rate": 8.060208225673992e-05, "loss": 0.2252, "step": 4687 }, { "epoch": 5.55, "learning_rate": 8.056504133119424e-05, "loss": 0.2101, "step": 4688 }, { "epoch": 5.55, "learning_rate": 8.052800317664971e-05, "loss": 0.1923, "step": 4689 }, { "epoch": 5.56, "learning_rate": 8.049096779838719e-05, "loss": 0.1833, "step": 4690 }, { "epoch": 5.56, "learning_rate": 8.04539352016871e-05, "loss": 0.1755, "step": 4691 }, { "epoch": 5.56, "learning_rate": 8.041690539182947e-05, "loss": 0.1772, "step": 4692 }, { "epoch": 5.56, "learning_rate": 8.037987837409402e-05, "loss": 0.1745, "step": 4693 }, { "epoch": 5.56, "learning_rate": 8.03428541537599e-05, "loss": 0.1801, "step": 4694 }, { "epoch": 5.56, "learning_rate": 8.0305832736106e-05, "loss": 0.1726, "step": 4695 }, { "epoch": 5.56, "learning_rate": 8.026881412641073e-05, "loss": 0.179, "step": 4696 }, { "epoch": 5.56, "learning_rate": 8.023179832995225e-05, "loss": 0.1757, "step": 4697 }, { "epoch": 5.57, "learning_rate": 8.019478535200806e-05, "loss": 0.1813, "step": 4698 }, { "epoch": 5.57, "learning_rate": 8.015777519785546e-05, "loss": 0.1899, "step": 4699 }, { "epoch": 5.57, "learning_rate": 8.01207678727713e-05, "loss": 0.1797, "step": 4700 }, { "epoch": 5.57, "learning_rate": 8.008376338203205e-05, "loss": 0.1836, "step": 4701 }, { "epoch": 5.57, "learning_rate": 8.004676173091368e-05, "loss": 0.179, "step": 4702 }, { "epoch": 5.57, "learning_rate": 8.000976292469183e-05, "loss": 0.1698, "step": 4703 }, { "epoch": 5.57, "learning_rate": 7.997276696864175e-05, "loss": 0.1911, "step": 4704 }, { "epoch": 5.57, "learning_rate": 7.993577386803827e-05, "loss": 0.1713, "step": 4705 }, { "epoch": 5.58, "learning_rate": 7.989878362815573e-05, "loss": 0.1673, "step": 4706 }, { "epoch": 5.58, "learning_rate": 7.98617962542682e-05, "loss": 0.191, "step": 4707 }, { "epoch": 5.58, "learning_rate": 7.982481175164923e-05, "loss": 0.1736, "step": 4708 }, { "epoch": 5.58, "learning_rate": 7.978783012557207e-05, "loss": 0.1799, "step": 4709 }, { "epoch": 5.58, "learning_rate": 7.975085138130938e-05, "loss": 0.1757, "step": 4710 }, { "epoch": 5.58, "learning_rate": 7.971387552413361e-05, "loss": 0.1794, "step": 4711 }, { "epoch": 5.58, "learning_rate": 7.967690255931667e-05, "loss": 0.1823, "step": 4712 }, { "epoch": 5.58, "learning_rate": 7.963993249213017e-05, "loss": 0.1753, "step": 4713 }, { "epoch": 5.58, "learning_rate": 7.960296532784515e-05, "loss": 0.1684, "step": 4714 }, { "epoch": 5.59, "learning_rate": 7.956600107173233e-05, "loss": 0.1693, "step": 4715 }, { "epoch": 5.59, "learning_rate": 7.952903972906204e-05, "loss": 0.1835, "step": 4716 }, { "epoch": 5.59, "learning_rate": 7.949208130510417e-05, "loss": 0.1808, "step": 4717 }, { "epoch": 5.59, "learning_rate": 7.945512580512813e-05, "loss": 0.183, "step": 4718 }, { "epoch": 5.59, "learning_rate": 7.941817323440302e-05, "loss": 0.1765, "step": 4719 }, { "epoch": 5.59, "learning_rate": 7.938122359819746e-05, "loss": 0.1776, "step": 4720 }, { "epoch": 5.59, "learning_rate": 7.934427690177965e-05, "loss": 0.1898, "step": 4721 }, { "epoch": 5.59, "learning_rate": 7.930733315041739e-05, "loss": 0.187, "step": 4722 }, { "epoch": 5.6, "learning_rate": 7.927039234937804e-05, "loss": 0.1785, "step": 4723 }, { "epoch": 5.6, "learning_rate": 7.923345450392856e-05, "loss": 0.1768, "step": 4724 }, { "epoch": 5.6, "learning_rate": 7.919651961933553e-05, "loss": 0.1766, "step": 4725 }, { "epoch": 5.6, "learning_rate": 7.915958770086498e-05, "loss": 0.1743, "step": 4726 }, { "epoch": 5.6, "learning_rate": 7.912265875378262e-05, "loss": 0.1763, "step": 4727 }, { "epoch": 5.6, "learning_rate": 7.908573278335371e-05, "loss": 0.1819, "step": 4728 }, { "epoch": 5.6, "learning_rate": 7.904880979484315e-05, "loss": 0.1785, "step": 4729 }, { "epoch": 5.6, "learning_rate": 7.901188979351526e-05, "loss": 0.1717, "step": 4730 }, { "epoch": 5.61, "learning_rate": 7.897497278463409e-05, "loss": 0.1737, "step": 4731 }, { "epoch": 5.61, "learning_rate": 7.893805877346316e-05, "loss": 0.1755, "step": 4732 }, { "epoch": 5.61, "learning_rate": 7.890114776526564e-05, "loss": 0.1739, "step": 4733 }, { "epoch": 5.61, "learning_rate": 7.88642397653042e-05, "loss": 0.1754, "step": 4734 }, { "epoch": 5.61, "learning_rate": 7.882733477884115e-05, "loss": 0.1772, "step": 4735 }, { "epoch": 5.61, "learning_rate": 7.87904328111383e-05, "loss": 0.1818, "step": 4736 }, { "epoch": 5.61, "learning_rate": 7.875353386745713e-05, "loss": 0.1781, "step": 4737 }, { "epoch": 5.61, "learning_rate": 7.871663795305855e-05, "loss": 0.1714, "step": 4738 }, { "epoch": 5.61, "learning_rate": 7.867974507320311e-05, "loss": 0.1952, "step": 4739 }, { "epoch": 5.62, "learning_rate": 7.864285523315096e-05, "loss": 0.1803, "step": 4740 }, { "epoch": 5.62, "learning_rate": 7.860596843816187e-05, "loss": 0.1843, "step": 4741 }, { "epoch": 5.62, "learning_rate": 7.856908469349495e-05, "loss": 0.1907, "step": 4742 }, { "epoch": 5.62, "learning_rate": 7.853220400440907e-05, "loss": 0.1848, "step": 4743 }, { "epoch": 5.62, "learning_rate": 7.849532637616264e-05, "loss": 0.1849, "step": 4744 }, { "epoch": 5.62, "learning_rate": 7.845845181401358e-05, "loss": 0.1878, "step": 4745 }, { "epoch": 5.62, "learning_rate": 7.84215803232194e-05, "loss": 0.1849, "step": 4746 }, { "epoch": 5.62, "learning_rate": 7.838471190903717e-05, "loss": 0.1778, "step": 4747 }, { "epoch": 5.63, "learning_rate": 7.834784657672353e-05, "loss": 0.1925, "step": 4748 }, { "epoch": 5.63, "learning_rate": 7.831098433153467e-05, "loss": 0.18, "step": 4749 }, { "epoch": 5.63, "learning_rate": 7.827412517872634e-05, "loss": 0.1823, "step": 4750 }, { "epoch": 5.63, "learning_rate": 7.823726912355384e-05, "loss": 0.1821, "step": 4751 }, { "epoch": 5.63, "learning_rate": 7.820041617127205e-05, "loss": 0.1732, "step": 4752 }, { "epoch": 5.63, "learning_rate": 7.816356632713545e-05, "loss": 0.1758, "step": 4753 }, { "epoch": 5.63, "learning_rate": 7.812671959639791e-05, "loss": 0.1701, "step": 4754 }, { "epoch": 5.63, "learning_rate": 7.808987598431303e-05, "loss": 0.1795, "step": 4755 }, { "epoch": 5.64, "learning_rate": 7.805303549613392e-05, "loss": 0.1732, "step": 4756 }, { "epoch": 5.64, "learning_rate": 7.80161981371132e-05, "loss": 0.1791, "step": 4757 }, { "epoch": 5.64, "learning_rate": 7.797936391250314e-05, "loss": 0.2083, "step": 4758 }, { "epoch": 5.64, "learning_rate": 7.794253282755541e-05, "loss": 0.1894, "step": 4759 }, { "epoch": 5.64, "learning_rate": 7.790570488752135e-05, "loss": 0.1774, "step": 4760 }, { "epoch": 5.64, "learning_rate": 7.786888009765185e-05, "loss": 0.1696, "step": 4761 }, { "epoch": 5.64, "learning_rate": 7.78320584631973e-05, "loss": 0.1693, "step": 4762 }, { "epoch": 5.64, "learning_rate": 7.779523998940766e-05, "loss": 0.1744, "step": 4763 }, { "epoch": 5.64, "learning_rate": 7.775842468153242e-05, "loss": 0.176, "step": 4764 }, { "epoch": 5.65, "learning_rate": 7.772161254482068e-05, "loss": 0.1683, "step": 4765 }, { "epoch": 5.65, "learning_rate": 7.768480358452107e-05, "loss": 0.1765, "step": 4766 }, { "epoch": 5.65, "learning_rate": 7.764799780588164e-05, "loss": 0.1836, "step": 4767 }, { "epoch": 5.65, "learning_rate": 7.761119521415016e-05, "loss": 0.1803, "step": 4768 }, { "epoch": 5.65, "learning_rate": 7.757439581457388e-05, "loss": 0.1745, "step": 4769 }, { "epoch": 5.65, "learning_rate": 7.753759961239964e-05, "loss": 0.1705, "step": 4770 }, { "epoch": 5.65, "learning_rate": 7.750080661287366e-05, "loss": 0.1818, "step": 4771 }, { "epoch": 5.65, "learning_rate": 7.74640168212419e-05, "loss": 0.1837, "step": 4772 }, { "epoch": 5.66, "learning_rate": 7.742723024274974e-05, "loss": 0.1956, "step": 4773 }, { "epoch": 5.66, "learning_rate": 7.73904468826422e-05, "loss": 0.1773, "step": 4774 }, { "epoch": 5.66, "learning_rate": 7.735366674616372e-05, "loss": 0.1798, "step": 4775 }, { "epoch": 5.66, "learning_rate": 7.731688983855838e-05, "loss": 0.1771, "step": 4776 }, { "epoch": 5.66, "learning_rate": 7.728011616506976e-05, "loss": 0.1868, "step": 4777 }, { "epoch": 5.66, "learning_rate": 7.7243345730941e-05, "loss": 0.1735, "step": 4778 }, { "epoch": 5.66, "learning_rate": 7.720657854141475e-05, "loss": 0.1727, "step": 4779 }, { "epoch": 5.66, "learning_rate": 7.716981460173319e-05, "loss": 0.1735, "step": 4780 }, { "epoch": 5.67, "learning_rate": 7.713305391713806e-05, "loss": 0.1809, "step": 4781 }, { "epoch": 5.67, "learning_rate": 7.709629649287068e-05, "loss": 0.2148, "step": 4782 }, { "epoch": 5.67, "learning_rate": 7.705954233417179e-05, "loss": 0.1791, "step": 4783 }, { "epoch": 5.67, "learning_rate": 7.702279144628174e-05, "loss": 0.1848, "step": 4784 }, { "epoch": 5.67, "learning_rate": 7.698604383444044e-05, "loss": 0.1745, "step": 4785 }, { "epoch": 5.67, "learning_rate": 7.694929950388732e-05, "loss": 0.1702, "step": 4786 }, { "epoch": 5.67, "learning_rate": 7.691255845986124e-05, "loss": 0.1764, "step": 4787 }, { "epoch": 5.67, "learning_rate": 7.687582070760073e-05, "loss": 0.171, "step": 4788 }, { "epoch": 5.67, "learning_rate": 7.683908625234376e-05, "loss": 0.1764, "step": 4789 }, { "epoch": 5.68, "learning_rate": 7.68023550993279e-05, "loss": 0.1831, "step": 4790 }, { "epoch": 5.68, "learning_rate": 7.676562725379018e-05, "loss": 0.1914, "step": 4791 }, { "epoch": 5.68, "learning_rate": 7.67289027209672e-05, "loss": 0.1752, "step": 4792 }, { "epoch": 5.68, "learning_rate": 7.669218150609507e-05, "loss": 0.1706, "step": 4793 }, { "epoch": 5.68, "learning_rate": 7.66554636144095e-05, "loss": 0.176, "step": 4794 }, { "epoch": 5.68, "learning_rate": 7.661874905114554e-05, "loss": 0.1786, "step": 4795 }, { "epoch": 5.68, "learning_rate": 7.658203782153793e-05, "loss": 0.1759, "step": 4796 }, { "epoch": 5.68, "learning_rate": 7.654532993082092e-05, "loss": 0.1718, "step": 4797 }, { "epoch": 5.69, "learning_rate": 7.650862538422831e-05, "loss": 0.1704, "step": 4798 }, { "epoch": 5.69, "learning_rate": 7.647192418699324e-05, "loss": 0.1814, "step": 4799 }, { "epoch": 5.69, "learning_rate": 7.643522634434856e-05, "loss": 0.1762, "step": 4800 }, { "epoch": 5.69, "learning_rate": 7.639853186152659e-05, "loss": 0.1777, "step": 4801 }, { "epoch": 5.69, "learning_rate": 7.636184074375917e-05, "loss": 0.1763, "step": 4802 }, { "epoch": 5.69, "learning_rate": 7.632515299627763e-05, "loss": 0.1798, "step": 4803 }, { "epoch": 5.69, "learning_rate": 7.628846862431283e-05, "loss": 0.1874, "step": 4804 }, { "epoch": 5.69, "learning_rate": 7.625178763309519e-05, "loss": 0.1853, "step": 4805 }, { "epoch": 5.7, "learning_rate": 7.621511002785467e-05, "loss": 0.1894, "step": 4806 }, { "epoch": 5.7, "learning_rate": 7.617843581382055e-05, "loss": 0.1831, "step": 4807 }, { "epoch": 5.7, "eval_loss": 3.838498115539551, "eval_runtime": 283.7734, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.726, "step": 4807 }, { "epoch": 5.7, "learning_rate": 7.614176499622189e-05, "loss": 0.1784, "step": 4808 }, { "epoch": 5.7, "learning_rate": 7.610509758028712e-05, "loss": 0.1802, "step": 4809 }, { "epoch": 5.7, "learning_rate": 7.606843357124426e-05, "loss": 0.173, "step": 4810 }, { "epoch": 5.7, "learning_rate": 7.603177297432069e-05, "loss": 0.187, "step": 4811 }, { "epoch": 5.7, "learning_rate": 7.59951157947435e-05, "loss": 0.1684, "step": 4812 }, { "epoch": 5.7, "learning_rate": 7.595846203773916e-05, "loss": 0.1786, "step": 4813 }, { "epoch": 5.7, "learning_rate": 7.592181170853373e-05, "loss": 0.171, "step": 4814 }, { "epoch": 5.71, "learning_rate": 7.588516481235271e-05, "loss": 0.1713, "step": 4815 }, { "epoch": 5.71, "learning_rate": 7.584852135442118e-05, "loss": 0.1772, "step": 4816 }, { "epoch": 5.71, "learning_rate": 7.581188133996368e-05, "loss": 0.1921, "step": 4817 }, { "epoch": 5.71, "learning_rate": 7.57752447742043e-05, "loss": 0.172, "step": 4818 }, { "epoch": 5.71, "learning_rate": 7.573861166236658e-05, "loss": 0.1688, "step": 4819 }, { "epoch": 5.71, "learning_rate": 7.570198200967362e-05, "loss": 0.1703, "step": 4820 }, { "epoch": 5.71, "learning_rate": 7.5665355821348e-05, "loss": 0.1765, "step": 4821 }, { "epoch": 5.71, "learning_rate": 7.562873310261183e-05, "loss": 0.1764, "step": 4822 }, { "epoch": 5.72, "learning_rate": 7.559211385868677e-05, "loss": 0.1923, "step": 4823 }, { "epoch": 5.72, "learning_rate": 7.55554980947938e-05, "loss": 0.1832, "step": 4824 }, { "epoch": 5.72, "learning_rate": 7.551888581615357e-05, "loss": 0.176, "step": 4825 }, { "epoch": 5.72, "learning_rate": 7.548227702798624e-05, "loss": 0.1836, "step": 4826 }, { "epoch": 5.72, "learning_rate": 7.544567173551143e-05, "loss": 0.1821, "step": 4827 }, { "epoch": 5.72, "learning_rate": 7.54090699439482e-05, "loss": 0.1685, "step": 4828 }, { "epoch": 5.72, "learning_rate": 7.537247165851518e-05, "loss": 0.1979, "step": 4829 }, { "epoch": 5.72, "learning_rate": 7.533587688443049e-05, "loss": 0.1765, "step": 4830 }, { "epoch": 5.73, "learning_rate": 7.529928562691178e-05, "loss": 0.1691, "step": 4831 }, { "epoch": 5.73, "learning_rate": 7.526269789117612e-05, "loss": 0.192, "step": 4832 }, { "epoch": 5.73, "learning_rate": 7.522611368244016e-05, "loss": 0.178, "step": 4833 }, { "epoch": 5.73, "learning_rate": 7.518953300591997e-05, "loss": 0.1803, "step": 4834 }, { "epoch": 5.73, "learning_rate": 7.515295586683122e-05, "loss": 0.1909, "step": 4835 }, { "epoch": 5.73, "learning_rate": 7.511638227038894e-05, "loss": 0.1912, "step": 4836 }, { "epoch": 5.73, "learning_rate": 7.507981222180776e-05, "loss": 0.172, "step": 4837 }, { "epoch": 5.73, "learning_rate": 7.504324572630177e-05, "loss": 0.1784, "step": 4838 }, { "epoch": 5.73, "learning_rate": 7.500668278908461e-05, "loss": 0.1769, "step": 4839 }, { "epoch": 5.74, "learning_rate": 7.497012341536924e-05, "loss": 0.171, "step": 4840 }, { "epoch": 5.74, "learning_rate": 7.493356761036829e-05, "loss": 0.1712, "step": 4841 }, { "epoch": 5.74, "learning_rate": 7.489701537929384e-05, "loss": 0.1866, "step": 4842 }, { "epoch": 5.74, "learning_rate": 7.486046672735743e-05, "loss": 0.1782, "step": 4843 }, { "epoch": 5.74, "learning_rate": 7.482392165977008e-05, "loss": 0.1797, "step": 4844 }, { "epoch": 5.74, "learning_rate": 7.478738018174234e-05, "loss": 0.1795, "step": 4845 }, { "epoch": 5.74, "learning_rate": 7.47508422984842e-05, "loss": 0.1756, "step": 4846 }, { "epoch": 5.74, "learning_rate": 7.471430801520522e-05, "loss": 0.1957, "step": 4847 }, { "epoch": 5.75, "learning_rate": 7.467777733711434e-05, "loss": 0.1782, "step": 4848 }, { "epoch": 5.75, "learning_rate": 7.464125026942003e-05, "loss": 0.1986, "step": 4849 }, { "epoch": 5.75, "learning_rate": 7.460472681733031e-05, "loss": 0.1792, "step": 4850 }, { "epoch": 5.75, "learning_rate": 7.456820698605263e-05, "loss": 0.1784, "step": 4851 }, { "epoch": 5.75, "learning_rate": 7.453169078079382e-05, "loss": 0.1795, "step": 4852 }, { "epoch": 5.75, "learning_rate": 7.44951782067604e-05, "loss": 0.1843, "step": 4853 }, { "epoch": 5.75, "learning_rate": 7.445866926915818e-05, "loss": 0.1772, "step": 4854 }, { "epoch": 5.75, "learning_rate": 7.442216397319266e-05, "loss": 0.1709, "step": 4855 }, { "epoch": 5.76, "learning_rate": 7.438566232406858e-05, "loss": 0.1707, "step": 4856 }, { "epoch": 5.76, "learning_rate": 7.434916432699033e-05, "loss": 0.1753, "step": 4857 }, { "epoch": 5.76, "learning_rate": 7.431266998716171e-05, "loss": 0.1781, "step": 4858 }, { "epoch": 5.76, "learning_rate": 7.427617930978606e-05, "loss": 0.1829, "step": 4859 }, { "epoch": 5.76, "learning_rate": 7.423969230006609e-05, "loss": 0.1949, "step": 4860 }, { "epoch": 5.76, "learning_rate": 7.42032089632041e-05, "loss": 0.1678, "step": 4861 }, { "epoch": 5.76, "learning_rate": 7.41667293044018e-05, "loss": 0.1919, "step": 4862 }, { "epoch": 5.76, "learning_rate": 7.413025332886044e-05, "loss": 0.1812, "step": 4863 }, { "epoch": 5.76, "learning_rate": 7.409378104178059e-05, "loss": 0.1851, "step": 4864 }, { "epoch": 5.77, "learning_rate": 7.40573124483625e-05, "loss": 0.1688, "step": 4865 }, { "epoch": 5.77, "learning_rate": 7.402084755380574e-05, "loss": 0.2043, "step": 4866 }, { "epoch": 5.77, "learning_rate": 7.398438636330948e-05, "loss": 0.1748, "step": 4867 }, { "epoch": 5.77, "learning_rate": 7.394792888207221e-05, "loss": 0.1867, "step": 4868 }, { "epoch": 5.77, "learning_rate": 7.391147511529202e-05, "loss": 0.1745, "step": 4869 }, { "epoch": 5.77, "learning_rate": 7.387502506816638e-05, "loss": 0.1715, "step": 4870 }, { "epoch": 5.77, "learning_rate": 7.383857874589232e-05, "loss": 0.1789, "step": 4871 }, { "epoch": 5.77, "learning_rate": 7.380213615366627e-05, "loss": 0.1724, "step": 4872 }, { "epoch": 5.78, "learning_rate": 7.376569729668413e-05, "loss": 0.181, "step": 4873 }, { "epoch": 5.78, "learning_rate": 7.372926218014131e-05, "loss": 0.1782, "step": 4874 }, { "epoch": 5.78, "learning_rate": 7.369283080923269e-05, "loss": 0.1729, "step": 4875 }, { "epoch": 5.78, "learning_rate": 7.36564031891525e-05, "loss": 0.1832, "step": 4876 }, { "epoch": 5.78, "learning_rate": 7.361997932509461e-05, "loss": 0.1864, "step": 4877 }, { "epoch": 5.78, "learning_rate": 7.358355922225222e-05, "loss": 0.1905, "step": 4878 }, { "epoch": 5.78, "learning_rate": 7.35471428858181e-05, "loss": 0.1911, "step": 4879 }, { "epoch": 5.78, "learning_rate": 7.351073032098437e-05, "loss": 0.1809, "step": 4880 }, { "epoch": 5.79, "learning_rate": 7.347432153294265e-05, "loss": 0.1757, "step": 4881 }, { "epoch": 5.79, "learning_rate": 7.34379165268841e-05, "loss": 0.1751, "step": 4882 }, { "epoch": 5.79, "learning_rate": 7.340151530799926e-05, "loss": 0.1772, "step": 4883 }, { "epoch": 5.79, "learning_rate": 7.336511788147811e-05, "loss": 0.1763, "step": 4884 }, { "epoch": 5.79, "learning_rate": 7.332872425251018e-05, "loss": 0.1721, "step": 4885 }, { "epoch": 5.79, "learning_rate": 7.329233442628437e-05, "loss": 0.1848, "step": 4886 }, { "epoch": 5.79, "learning_rate": 7.325594840798911e-05, "loss": 0.1752, "step": 4887 }, { "epoch": 5.79, "learning_rate": 7.321956620281223e-05, "loss": 0.1905, "step": 4888 }, { "epoch": 5.79, "learning_rate": 7.318318781594106e-05, "loss": 0.1787, "step": 4889 }, { "epoch": 5.8, "learning_rate": 7.314681325256232e-05, "loss": 0.1739, "step": 4890 }, { "epoch": 5.8, "learning_rate": 7.311044251786227e-05, "loss": 0.171, "step": 4891 }, { "epoch": 5.8, "learning_rate": 7.307407561702662e-05, "loss": 0.1822, "step": 4892 }, { "epoch": 5.8, "learning_rate": 7.303771255524038e-05, "loss": 0.1766, "step": 4893 }, { "epoch": 5.8, "learning_rate": 7.300135333768821e-05, "loss": 0.1742, "step": 4894 }, { "epoch": 5.8, "learning_rate": 7.296499796955414e-05, "loss": 0.1805, "step": 4895 }, { "epoch": 5.8, "learning_rate": 7.292864645602169e-05, "loss": 0.1762, "step": 4896 }, { "epoch": 5.8, "learning_rate": 7.28922988022737e-05, "loss": 0.1718, "step": 4897 }, { "epoch": 5.81, "learning_rate": 7.285595501349258e-05, "loss": 0.175, "step": 4898 }, { "epoch": 5.81, "learning_rate": 7.28196150948602e-05, "loss": 0.1846, "step": 4899 }, { "epoch": 5.81, "learning_rate": 7.278327905155783e-05, "loss": 0.1742, "step": 4900 }, { "epoch": 5.81, "learning_rate": 7.274694688876616e-05, "loss": 0.179, "step": 4901 }, { "epoch": 5.81, "learning_rate": 7.271061861166539e-05, "loss": 0.1791, "step": 4902 }, { "epoch": 5.81, "learning_rate": 7.267429422543514e-05, "loss": 0.1721, "step": 4903 }, { "epoch": 5.81, "learning_rate": 7.263797373525451e-05, "loss": 0.1768, "step": 4904 }, { "epoch": 5.81, "learning_rate": 7.260165714630195e-05, "loss": 0.1778, "step": 4905 }, { "epoch": 5.82, "learning_rate": 7.256534446375542e-05, "loss": 0.1717, "step": 4906 }, { "epoch": 5.82, "learning_rate": 7.252903569279235e-05, "loss": 0.1813, "step": 4907 }, { "epoch": 5.82, "learning_rate": 7.249273083858961e-05, "loss": 0.1715, "step": 4908 }, { "epoch": 5.82, "learning_rate": 7.245642990632338e-05, "loss": 0.1691, "step": 4909 }, { "epoch": 5.82, "learning_rate": 7.242013290116944e-05, "loss": 0.1732, "step": 4910 }, { "epoch": 5.82, "learning_rate": 7.238383982830292e-05, "loss": 0.1861, "step": 4911 }, { "epoch": 5.82, "learning_rate": 7.23475506928985e-05, "loss": 0.2166, "step": 4912 }, { "epoch": 5.82, "learning_rate": 7.231126550013015e-05, "loss": 0.1835, "step": 4913 }, { "epoch": 5.82, "learning_rate": 7.227498425517134e-05, "loss": 0.1663, "step": 4914 }, { "epoch": 5.83, "learning_rate": 7.2238706963195e-05, "loss": 0.1761, "step": 4915 }, { "epoch": 5.83, "learning_rate": 7.220243362937352e-05, "loss": 0.1767, "step": 4916 }, { "epoch": 5.83, "learning_rate": 7.216616425887863e-05, "loss": 0.1745, "step": 4917 }, { "epoch": 5.83, "learning_rate": 7.212989885688157e-05, "loss": 0.1798, "step": 4918 }, { "epoch": 5.83, "learning_rate": 7.209363742855302e-05, "loss": 0.1746, "step": 4919 }, { "epoch": 5.83, "learning_rate": 7.205737997906307e-05, "loss": 0.1923, "step": 4920 }, { "epoch": 5.83, "learning_rate": 7.202112651358117e-05, "loss": 0.189, "step": 4921 }, { "epoch": 5.83, "learning_rate": 7.198487703727632e-05, "loss": 0.1773, "step": 4922 }, { "epoch": 5.84, "learning_rate": 7.194863155531692e-05, "loss": 0.1739, "step": 4923 }, { "epoch": 5.84, "learning_rate": 7.191239007287081e-05, "loss": 0.1778, "step": 4924 }, { "epoch": 5.84, "learning_rate": 7.187615259510516e-05, "loss": 0.1753, "step": 4925 }, { "epoch": 5.84, "learning_rate": 7.183991912718669e-05, "loss": 0.1735, "step": 4926 }, { "epoch": 5.84, "learning_rate": 7.180368967428149e-05, "loss": 0.1783, "step": 4927 }, { "epoch": 5.84, "learning_rate": 7.176746424155512e-05, "loss": 0.1808, "step": 4928 }, { "epoch": 5.84, "learning_rate": 7.173124283417247e-05, "loss": 0.1768, "step": 4929 }, { "epoch": 5.84, "learning_rate": 7.169502545729797e-05, "loss": 0.1766, "step": 4930 }, { "epoch": 5.85, "learning_rate": 7.165881211609543e-05, "loss": 0.2007, "step": 4931 }, { "epoch": 5.85, "learning_rate": 7.162260281572808e-05, "loss": 0.1848, "step": 4932 }, { "epoch": 5.85, "learning_rate": 7.158639756135854e-05, "loss": 0.1824, "step": 4933 }, { "epoch": 5.85, "learning_rate": 7.155019635814894e-05, "loss": 0.1754, "step": 4934 }, { "epoch": 5.85, "learning_rate": 7.151399921126075e-05, "loss": 0.1852, "step": 4935 }, { "epoch": 5.85, "learning_rate": 7.147780612585495e-05, "loss": 0.1667, "step": 4936 }, { "epoch": 5.85, "learning_rate": 7.144161710709178e-05, "loss": 0.1794, "step": 4937 }, { "epoch": 5.85, "learning_rate": 7.14054321601311e-05, "loss": 0.1644, "step": 4938 }, { "epoch": 5.85, "learning_rate": 7.136925129013203e-05, "loss": 0.1897, "step": 4939 }, { "epoch": 5.86, "learning_rate": 7.133307450225322e-05, "loss": 0.1778, "step": 4940 }, { "epoch": 5.86, "learning_rate": 7.129690180165266e-05, "loss": 0.1793, "step": 4941 }, { "epoch": 5.86, "learning_rate": 7.12607331934878e-05, "loss": 0.1799, "step": 4942 }, { "epoch": 5.86, "learning_rate": 7.122456868291548e-05, "loss": 0.1688, "step": 4943 }, { "epoch": 5.86, "learning_rate": 7.118840827509201e-05, "loss": 0.1713, "step": 4944 }, { "epoch": 5.86, "learning_rate": 7.115225197517304e-05, "loss": 0.1786, "step": 4945 }, { "epoch": 5.86, "learning_rate": 7.111609978831367e-05, "loss": 0.1736, "step": 4946 }, { "epoch": 5.86, "learning_rate": 7.107995171966842e-05, "loss": 0.1861, "step": 4947 }, { "epoch": 5.87, "learning_rate": 7.104380777439127e-05, "loss": 0.1745, "step": 4948 }, { "epoch": 5.87, "learning_rate": 7.100766795763546e-05, "loss": 0.1896, "step": 4949 }, { "epoch": 5.87, "learning_rate": 7.097153227455379e-05, "loss": 0.1737, "step": 4950 }, { "epoch": 5.87, "learning_rate": 7.093540073029839e-05, "loss": 0.2093, "step": 4951 }, { "epoch": 5.87, "learning_rate": 7.089927333002086e-05, "loss": 0.1994, "step": 4952 }, { "epoch": 5.87, "learning_rate": 7.086315007887225e-05, "loss": 0.1794, "step": 4953 }, { "epoch": 5.87, "learning_rate": 7.082703098200282e-05, "loss": 0.1797, "step": 4954 }, { "epoch": 5.87, "learning_rate": 7.079091604456241e-05, "loss": 0.166, "step": 4955 }, { "epoch": 5.88, "learning_rate": 7.075480527170024e-05, "loss": 0.1787, "step": 4956 }, { "epoch": 5.88, "learning_rate": 7.071869866856493e-05, "loss": 0.1801, "step": 4957 }, { "epoch": 5.88, "learning_rate": 7.068259624030444e-05, "loss": 0.1689, "step": 4958 }, { "epoch": 5.88, "learning_rate": 7.064649799206625e-05, "loss": 0.1789, "step": 4959 }, { "epoch": 5.88, "learning_rate": 7.061040392899712e-05, "loss": 0.174, "step": 4960 }, { "epoch": 5.88, "learning_rate": 7.057431405624335e-05, "loss": 0.1745, "step": 4961 }, { "epoch": 5.88, "learning_rate": 7.053822837895051e-05, "loss": 0.1763, "step": 4962 }, { "epoch": 5.88, "learning_rate": 7.050214690226364e-05, "loss": 0.1777, "step": 4963 }, { "epoch": 5.88, "learning_rate": 7.04660696313272e-05, "loss": 0.1846, "step": 4964 }, { "epoch": 5.89, "learning_rate": 7.042999657128503e-05, "loss": 0.1842, "step": 4965 }, { "epoch": 5.89, "learning_rate": 7.03939277272803e-05, "loss": 0.1839, "step": 4966 }, { "epoch": 5.89, "learning_rate": 7.035786310445567e-05, "loss": 0.1716, "step": 4967 }, { "epoch": 5.89, "learning_rate": 7.032180270795317e-05, "loss": 0.2148, "step": 4968 }, { "epoch": 5.89, "learning_rate": 7.028574654291426e-05, "loss": 0.1774, "step": 4969 }, { "epoch": 5.89, "learning_rate": 7.024969461447972e-05, "loss": 0.1759, "step": 4970 }, { "epoch": 5.89, "learning_rate": 7.021364692778977e-05, "loss": 0.1776, "step": 4971 }, { "epoch": 5.89, "learning_rate": 7.017760348798403e-05, "loss": 0.1946, "step": 4972 }, { "epoch": 5.9, "learning_rate": 7.014156430020153e-05, "loss": 0.1739, "step": 4973 }, { "epoch": 5.9, "learning_rate": 7.010552936958066e-05, "loss": 0.1983, "step": 4974 }, { "epoch": 5.9, "learning_rate": 7.006949870125919e-05, "loss": 0.1863, "step": 4975 }, { "epoch": 5.9, "learning_rate": 7.003347230037433e-05, "loss": 0.1755, "step": 4976 }, { "epoch": 5.9, "learning_rate": 6.99974501720627e-05, "loss": 0.1776, "step": 4977 }, { "epoch": 5.9, "learning_rate": 6.99614323214602e-05, "loss": 0.1922, "step": 4978 }, { "epoch": 5.9, "learning_rate": 6.992541875370217e-05, "loss": 0.1898, "step": 4979 }, { "epoch": 5.9, "learning_rate": 6.988940947392344e-05, "loss": 0.1818, "step": 4980 }, { "epoch": 5.91, "learning_rate": 6.985340448725815e-05, "loss": 0.1911, "step": 4981 }, { "epoch": 5.91, "learning_rate": 6.981740379883974e-05, "loss": 0.1688, "step": 4982 }, { "epoch": 5.91, "learning_rate": 6.97814074138012e-05, "loss": 0.1917, "step": 4983 }, { "epoch": 5.91, "learning_rate": 6.974541533727476e-05, "loss": 0.1944, "step": 4984 }, { "epoch": 5.91, "learning_rate": 6.97094275743922e-05, "loss": 0.187, "step": 4985 }, { "epoch": 5.91, "learning_rate": 6.967344413028452e-05, "loss": 0.1714, "step": 4986 }, { "epoch": 5.91, "learning_rate": 6.963746501008217e-05, "loss": 0.1714, "step": 4987 }, { "epoch": 5.91, "learning_rate": 6.9601490218915e-05, "loss": 0.1814, "step": 4988 }, { "epoch": 5.91, "learning_rate": 6.95655197619123e-05, "loss": 0.1749, "step": 4989 }, { "epoch": 5.92, "learning_rate": 6.952955364420255e-05, "loss": 0.1793, "step": 4990 }, { "epoch": 5.92, "learning_rate": 6.94935918709138e-05, "loss": 0.17, "step": 4991 }, { "epoch": 5.92, "learning_rate": 6.945763444717341e-05, "loss": 0.1717, "step": 4992 }, { "epoch": 5.92, "learning_rate": 6.942168137810818e-05, "loss": 0.1876, "step": 4993 }, { "epoch": 5.92, "learning_rate": 6.938573266884413e-05, "loss": 0.1788, "step": 4994 }, { "epoch": 5.92, "learning_rate": 6.93497883245068e-05, "loss": 0.1773, "step": 4995 }, { "epoch": 5.92, "learning_rate": 6.931384835022109e-05, "loss": 0.1843, "step": 4996 }, { "epoch": 5.92, "learning_rate": 6.927791275111126e-05, "loss": 0.1758, "step": 4997 }, { "epoch": 5.93, "learning_rate": 6.924198153230091e-05, "loss": 0.1831, "step": 4998 } ], "logging_steps": 1, "max_steps": 8330, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 833, "total_flos": 1.7518364490599498e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }