{ "best_metric": 1.4208089113235474, "best_model_checkpoint": "AlexWang99/byt5_re_1k/checkpoint-362", "epoch": 181.0, "eval_steps": 500, "global_step": 362, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 7.775562286376953, "eval_runtime": 10.6848, "eval_samples_per_second": 935.909, "eval_steps_per_second": 1.217, "step": 2 }, { "epoch": 2.0, "eval_loss": 6.888936519622803, "eval_runtime": 10.8293, "eval_samples_per_second": 923.42, "eval_steps_per_second": 1.2, "step": 4 }, { "epoch": 3.0, "eval_loss": 6.1838483810424805, "eval_runtime": 10.6887, "eval_samples_per_second": 935.569, "eval_steps_per_second": 1.216, "step": 6 }, { "epoch": 4.0, "eval_loss": 5.626400947570801, "eval_runtime": 10.9681, "eval_samples_per_second": 911.734, "eval_steps_per_second": 1.185, "step": 8 }, { "epoch": 5.0, "eval_loss": 5.153331279754639, "eval_runtime": 10.7369, "eval_samples_per_second": 931.367, "eval_steps_per_second": 1.211, "step": 10 }, { "epoch": 6.0, "eval_loss": 4.734142780303955, "eval_runtime": 11.004, "eval_samples_per_second": 908.762, "eval_steps_per_second": 1.181, "step": 12 }, { "epoch": 7.0, "eval_loss": 4.333576679229736, "eval_runtime": 10.7676, "eval_samples_per_second": 928.708, "eval_steps_per_second": 1.207, "step": 14 }, { "epoch": 8.0, "eval_loss": 3.938502788543701, "eval_runtime": 10.782, "eval_samples_per_second": 927.476, "eval_steps_per_second": 1.206, "step": 16 }, { "epoch": 9.0, "eval_loss": 3.557016134262085, "eval_runtime": 11.022, "eval_samples_per_second": 907.278, "eval_steps_per_second": 1.179, "step": 18 }, { "epoch": 10.0, "eval_loss": 3.2168831825256348, "eval_runtime": 10.7843, "eval_samples_per_second": 927.273, "eval_steps_per_second": 1.205, "step": 20 }, { "epoch": 11.0, "eval_loss": 2.9309566020965576, "eval_runtime": 10.8755, "eval_samples_per_second": 919.501, "eval_steps_per_second": 1.195, "step": 22 }, { "epoch": 12.0, "eval_loss": 2.7042219638824463, "eval_runtime": 10.7967, "eval_samples_per_second": 926.21, "eval_steps_per_second": 1.204, "step": 24 }, { "epoch": 13.0, "eval_loss": 2.5138416290283203, "eval_runtime": 10.8666, "eval_samples_per_second": 920.248, "eval_steps_per_second": 1.196, "step": 26 }, { "epoch": 14.0, "eval_loss": 2.3652985095977783, "eval_runtime": 10.7919, "eval_samples_per_second": 926.619, "eval_steps_per_second": 1.205, "step": 28 }, { "epoch": 15.0, "eval_loss": 2.248030185699463, "eval_runtime": 10.8705, "eval_samples_per_second": 919.923, "eval_steps_per_second": 1.196, "step": 30 }, { "epoch": 16.0, "eval_loss": 2.13440203666687, "eval_runtime": 10.7977, "eval_samples_per_second": 926.12, "eval_steps_per_second": 1.204, "step": 32 }, { "epoch": 17.0, "eval_loss": 2.0899171829223633, "eval_runtime": 10.7912, "eval_samples_per_second": 926.685, "eval_steps_per_second": 1.205, "step": 34 }, { "epoch": 18.0, "eval_loss": 2.100477457046509, "eval_runtime": 10.8653, "eval_samples_per_second": 920.357, "eval_steps_per_second": 1.196, "step": 36 }, { "epoch": 19.0, "eval_loss": 2.047548532485962, "eval_runtime": 10.7885, "eval_samples_per_second": 926.915, "eval_steps_per_second": 1.205, "step": 38 }, { "epoch": 20.0, "eval_loss": 1.996556043624878, "eval_runtime": 11.0201, "eval_samples_per_second": 907.429, "eval_steps_per_second": 1.18, "step": 40 }, { "epoch": 21.0, "eval_loss": 1.9695481061935425, "eval_runtime": 10.783, "eval_samples_per_second": 927.385, "eval_steps_per_second": 1.206, "step": 42 }, { "epoch": 22.0, "eval_loss": 1.9760867357254028, "eval_runtime": 10.8889, "eval_samples_per_second": 918.37, "eval_steps_per_second": 1.194, "step": 44 }, { "epoch": 23.0, "eval_loss": 1.9658013582229614, "eval_runtime": 10.7879, "eval_samples_per_second": 926.962, "eval_steps_per_second": 1.205, "step": 46 }, { "epoch": 24.0, "eval_loss": 1.9119044542312622, "eval_runtime": 10.8618, "eval_samples_per_second": 920.659, "eval_steps_per_second": 1.197, "step": 48 }, { "epoch": 25.0, "eval_loss": 1.89186429977417, "eval_runtime": 10.7933, "eval_samples_per_second": 926.503, "eval_steps_per_second": 1.204, "step": 50 }, { "epoch": 26.0, "eval_loss": 1.8932195901870728, "eval_runtime": 10.873, "eval_samples_per_second": 919.709, "eval_steps_per_second": 1.196, "step": 52 }, { "epoch": 27.0, "eval_loss": 1.9076615571975708, "eval_runtime": 10.7917, "eval_samples_per_second": 926.637, "eval_steps_per_second": 1.205, "step": 54 }, { "epoch": 28.0, "eval_loss": 1.8671808242797852, "eval_runtime": 10.7945, "eval_samples_per_second": 926.402, "eval_steps_per_second": 1.204, "step": 56 }, { "epoch": 29.0, "eval_loss": 1.8664835691452026, "eval_runtime": 10.8739, "eval_samples_per_second": 919.632, "eval_steps_per_second": 1.196, "step": 58 }, { "epoch": 30.0, "eval_loss": 1.8679856061935425, "eval_runtime": 10.7966, "eval_samples_per_second": 926.219, "eval_steps_per_second": 1.204, "step": 60 }, { "epoch": 31.0, "eval_loss": 1.858332633972168, "eval_runtime": 10.8742, "eval_samples_per_second": 919.609, "eval_steps_per_second": 1.195, "step": 62 }, { "epoch": 32.0, "eval_loss": 1.8586550951004028, "eval_runtime": 10.7775, "eval_samples_per_second": 927.859, "eval_steps_per_second": 1.206, "step": 64 }, { "epoch": 33.0, "eval_loss": 1.836769700050354, "eval_runtime": 10.8615, "eval_samples_per_second": 920.686, "eval_steps_per_second": 1.197, "step": 66 }, { "epoch": 34.0, "eval_loss": 1.8279472589492798, "eval_runtime": 10.7787, "eval_samples_per_second": 927.754, "eval_steps_per_second": 1.206, "step": 68 }, { "epoch": 35.0, "eval_loss": 1.817927360534668, "eval_runtime": 10.7813, "eval_samples_per_second": 927.534, "eval_steps_per_second": 1.206, "step": 70 }, { "epoch": 36.0, "eval_loss": 1.7950810194015503, "eval_runtime": 11.0203, "eval_samples_per_second": 907.419, "eval_steps_per_second": 1.18, "step": 72 }, { "epoch": 37.0, "eval_loss": 1.7667052745819092, "eval_runtime": 10.7915, "eval_samples_per_second": 926.655, "eval_steps_per_second": 1.205, "step": 74 }, { "epoch": 38.0, "eval_loss": 1.7852015495300293, "eval_runtime": 10.8703, "eval_samples_per_second": 919.941, "eval_steps_per_second": 1.196, "step": 76 }, { "epoch": 39.0, "eval_loss": 1.7741429805755615, "eval_runtime": 10.7837, "eval_samples_per_second": 927.326, "eval_steps_per_second": 1.206, "step": 78 }, { "epoch": 40.0, "eval_loss": 1.718443512916565, "eval_runtime": 10.8729, "eval_samples_per_second": 919.72, "eval_steps_per_second": 1.196, "step": 80 }, { "epoch": 41.0, "eval_loss": 1.6899281740188599, "eval_runtime": 10.789, "eval_samples_per_second": 926.869, "eval_steps_per_second": 1.205, "step": 82 }, { "epoch": 42.0, "eval_loss": 1.6984148025512695, "eval_runtime": 10.8669, "eval_samples_per_second": 920.229, "eval_steps_per_second": 1.196, "step": 84 }, { "epoch": 43.0, "eval_loss": 1.68986177444458, "eval_runtime": 10.7878, "eval_samples_per_second": 926.976, "eval_steps_per_second": 1.205, "step": 86 }, { "epoch": 44.0, "eval_loss": 1.643083930015564, "eval_runtime": 10.7947, "eval_samples_per_second": 926.384, "eval_steps_per_second": 1.204, "step": 88 }, { "epoch": 45.0, "eval_loss": 1.6373401880264282, "eval_runtime": 10.8675, "eval_samples_per_second": 920.171, "eval_steps_per_second": 1.196, "step": 90 }, { "epoch": 46.0, "eval_loss": 1.6289523839950562, "eval_runtime": 10.7853, "eval_samples_per_second": 927.192, "eval_steps_per_second": 1.205, "step": 92 }, { "epoch": 47.0, "eval_loss": 1.6165605783462524, "eval_runtime": 11.0285, "eval_samples_per_second": 906.741, "eval_steps_per_second": 1.179, "step": 94 }, { "epoch": 48.0, "eval_loss": 1.6076592206954956, "eval_runtime": 10.7945, "eval_samples_per_second": 926.401, "eval_steps_per_second": 1.204, "step": 96 }, { "epoch": 49.0, "eval_loss": 1.604373574256897, "eval_runtime": 10.8717, "eval_samples_per_second": 919.822, "eval_steps_per_second": 1.196, "step": 98 }, { "epoch": 50.0, "eval_loss": 1.616941213607788, "eval_runtime": 10.793, "eval_samples_per_second": 926.53, "eval_steps_per_second": 1.204, "step": 100 }, { "epoch": 51.0, "eval_loss": 1.6135770082473755, "eval_runtime": 11.0422, "eval_samples_per_second": 905.614, "eval_steps_per_second": 1.177, "step": 102 }, { "epoch": 52.0, "eval_loss": 1.584197759628296, "eval_runtime": 10.7862, "eval_samples_per_second": 927.109, "eval_steps_per_second": 1.205, "step": 104 }, { "epoch": 53.0, "eval_loss": 1.5821086168289185, "eval_runtime": 11.0219, "eval_samples_per_second": 907.286, "eval_steps_per_second": 1.179, "step": 106 }, { "epoch": 54.0, "eval_loss": 1.592121958732605, "eval_runtime": 10.7783, "eval_samples_per_second": 927.787, "eval_steps_per_second": 1.206, "step": 108 }, { "epoch": 55.0, "eval_loss": 1.598443865776062, "eval_runtime": 11.0215, "eval_samples_per_second": 907.318, "eval_steps_per_second": 1.18, "step": 110 }, { "epoch": 56.0, "eval_loss": 1.562904715538025, "eval_runtime": 10.8689, "eval_samples_per_second": 920.053, "eval_steps_per_second": 1.196, "step": 112 }, { "epoch": 57.0, "eval_loss": 1.556736707687378, "eval_runtime": 10.7877, "eval_samples_per_second": 926.98, "eval_steps_per_second": 1.205, "step": 114 }, { "epoch": 58.0, "eval_loss": 1.554081916809082, "eval_runtime": 10.8724, "eval_samples_per_second": 919.757, "eval_steps_per_second": 1.196, "step": 116 }, { "epoch": 59.0, "eval_loss": 1.5780812501907349, "eval_runtime": 10.7882, "eval_samples_per_second": 926.94, "eval_steps_per_second": 1.205, "step": 118 }, { "epoch": 60.0, "eval_loss": 1.5569387674331665, "eval_runtime": 10.878, "eval_samples_per_second": 919.289, "eval_steps_per_second": 1.195, "step": 120 }, { "epoch": 61.0, "eval_loss": 1.5472592115402222, "eval_runtime": 10.797, "eval_samples_per_second": 926.18, "eval_steps_per_second": 1.204, "step": 122 }, { "epoch": 62.0, "eval_loss": 1.5447337627410889, "eval_runtime": 10.7871, "eval_samples_per_second": 927.031, "eval_steps_per_second": 1.205, "step": 124 }, { "epoch": 63.0, "eval_loss": 1.5714285373687744, "eval_runtime": 11.0266, "eval_samples_per_second": 906.896, "eval_steps_per_second": 1.179, "step": 126 }, { "epoch": 64.0, "eval_loss": 1.572940468788147, "eval_runtime": 10.7931, "eval_samples_per_second": 926.518, "eval_steps_per_second": 1.204, "step": 128 }, { "epoch": 65.0, "eval_loss": 1.5434839725494385, "eval_runtime": 10.8657, "eval_samples_per_second": 920.328, "eval_steps_per_second": 1.196, "step": 130 }, { "epoch": 66.0, "eval_loss": 1.5406599044799805, "eval_runtime": 10.7896, "eval_samples_per_second": 926.816, "eval_steps_per_second": 1.205, "step": 132 }, { "epoch": 67.0, "eval_loss": 1.5430865287780762, "eval_runtime": 10.868, "eval_samples_per_second": 920.137, "eval_steps_per_second": 1.196, "step": 134 }, { "epoch": 68.0, "eval_loss": 1.5774503946304321, "eval_runtime": 10.7888, "eval_samples_per_second": 926.889, "eval_steps_per_second": 1.205, "step": 136 }, { "epoch": 69.0, "eval_loss": 1.5680021047592163, "eval_runtime": 10.8677, "eval_samples_per_second": 920.161, "eval_steps_per_second": 1.196, "step": 138 }, { "epoch": 70.0, "eval_loss": 1.5386120080947876, "eval_runtime": 10.7802, "eval_samples_per_second": 927.629, "eval_steps_per_second": 1.206, "step": 140 }, { "epoch": 71.0, "eval_loss": 1.537217378616333, "eval_runtime": 10.7876, "eval_samples_per_second": 926.994, "eval_steps_per_second": 1.205, "step": 142 }, { "epoch": 72.0, "eval_loss": 1.5415935516357422, "eval_runtime": 10.8784, "eval_samples_per_second": 919.255, "eval_steps_per_second": 1.195, "step": 144 }, { "epoch": 73.0, "eval_loss": 1.5851819515228271, "eval_runtime": 10.7879, "eval_samples_per_second": 926.962, "eval_steps_per_second": 1.205, "step": 146 }, { "epoch": 74.0, "eval_loss": 1.5615135431289673, "eval_runtime": 11.0229, "eval_samples_per_second": 907.205, "eval_steps_per_second": 1.179, "step": 148 }, { "epoch": 75.0, "eval_loss": 1.5356107950210571, "eval_runtime": 10.7892, "eval_samples_per_second": 926.852, "eval_steps_per_second": 1.205, "step": 150 }, { "epoch": 76.0, "eval_loss": 1.5345250368118286, "eval_runtime": 10.8628, "eval_samples_per_second": 920.574, "eval_steps_per_second": 1.197, "step": 152 }, { "epoch": 77.0, "eval_loss": 1.5348494052886963, "eval_runtime": 10.7982, "eval_samples_per_second": 926.08, "eval_steps_per_second": 1.204, "step": 154 }, { "epoch": 78.0, "eval_loss": 1.5350173711776733, "eval_runtime": 11.0234, "eval_samples_per_second": 907.165, "eval_steps_per_second": 1.179, "step": 156 }, { "epoch": 79.0, "eval_loss": 1.5344058275222778, "eval_runtime": 10.7938, "eval_samples_per_second": 926.459, "eval_steps_per_second": 1.204, "step": 158 }, { "epoch": 80.0, "eval_loss": 1.5319613218307495, "eval_runtime": 11.0103, "eval_samples_per_second": 908.24, "eval_steps_per_second": 1.181, "step": 160 }, { "epoch": 81.0, "eval_loss": 1.5311990976333618, "eval_runtime": 10.7993, "eval_samples_per_second": 925.99, "eval_steps_per_second": 1.204, "step": 162 }, { "epoch": 82.0, "eval_loss": 1.5308246612548828, "eval_runtime": 11.0279, "eval_samples_per_second": 906.787, "eval_steps_per_second": 1.179, "step": 164 }, { "epoch": 83.0, "eval_loss": 1.530852198600769, "eval_runtime": 10.8812, "eval_samples_per_second": 919.019, "eval_steps_per_second": 1.195, "step": 166 }, { "epoch": 84.0, "eval_loss": 1.528510332107544, "eval_runtime": 10.7908, "eval_samples_per_second": 926.72, "eval_steps_per_second": 1.205, "step": 168 }, { "epoch": 85.0, "eval_loss": 1.5265289545059204, "eval_runtime": 10.8831, "eval_samples_per_second": 918.858, "eval_steps_per_second": 1.195, "step": 170 }, { "epoch": 86.0, "eval_loss": 1.525810956954956, "eval_runtime": 10.7922, "eval_samples_per_second": 926.595, "eval_steps_per_second": 1.205, "step": 172 }, { "epoch": 87.0, "eval_loss": 1.5260074138641357, "eval_runtime": 10.8658, "eval_samples_per_second": 920.317, "eval_steps_per_second": 1.196, "step": 174 }, { "epoch": 88.0, "eval_loss": 1.523621678352356, "eval_runtime": 10.8062, "eval_samples_per_second": 925.398, "eval_steps_per_second": 1.203, "step": 176 }, { "epoch": 89.0, "eval_loss": 1.522126317024231, "eval_runtime": 10.7844, "eval_samples_per_second": 927.263, "eval_steps_per_second": 1.205, "step": 178 }, { "epoch": 90.0, "eval_loss": 1.5219790935516357, "eval_runtime": 10.8694, "eval_samples_per_second": 920.018, "eval_steps_per_second": 1.196, "step": 180 }, { "epoch": 91.0, "eval_loss": 1.5207847356796265, "eval_runtime": 10.7979, "eval_samples_per_second": 926.103, "eval_steps_per_second": 1.204, "step": 182 }, { "epoch": 92.0, "eval_loss": 1.517883062362671, "eval_runtime": 10.8783, "eval_samples_per_second": 919.261, "eval_steps_per_second": 1.195, "step": 184 }, { "epoch": 93.0, "eval_loss": 1.515496015548706, "eval_runtime": 10.783, "eval_samples_per_second": 927.387, "eval_steps_per_second": 1.206, "step": 186 }, { "epoch": 94.0, "eval_loss": 1.5140419006347656, "eval_runtime": 11.0298, "eval_samples_per_second": 906.635, "eval_steps_per_second": 1.179, "step": 188 }, { "epoch": 95.0, "eval_loss": 1.5137407779693604, "eval_runtime": 10.8014, "eval_samples_per_second": 925.809, "eval_steps_per_second": 1.204, "step": 190 }, { "epoch": 96.0, "eval_loss": 1.5128830671310425, "eval_runtime": 10.8766, "eval_samples_per_second": 919.402, "eval_steps_per_second": 1.195, "step": 192 }, { "epoch": 97.0, "eval_loss": 1.5099443197250366, "eval_runtime": 10.809, "eval_samples_per_second": 925.158, "eval_steps_per_second": 1.203, "step": 194 }, { "epoch": 98.0, "eval_loss": 1.506999135017395, "eval_runtime": 10.9785, "eval_samples_per_second": 910.872, "eval_steps_per_second": 1.184, "step": 196 }, { "epoch": 99.0, "eval_loss": 1.5044018030166626, "eval_runtime": 10.883, "eval_samples_per_second": 918.868, "eval_steps_per_second": 1.195, "step": 198 }, { "epoch": 100.0, "eval_loss": 1.5024514198303223, "eval_runtime": 10.7906, "eval_samples_per_second": 926.73, "eval_steps_per_second": 1.205, "step": 200 }, { "epoch": 101.0, "eval_loss": 1.501634120941162, "eval_runtime": 10.7793, "eval_samples_per_second": 927.703, "eval_steps_per_second": 1.206, "step": 202 }, { "epoch": 102.0, "eval_loss": 1.5009431838989258, "eval_runtime": 10.7085, "eval_samples_per_second": 933.839, "eval_steps_per_second": 1.214, "step": 204 }, { "epoch": 103.0, "eval_loss": 1.499057650566101, "eval_runtime": 10.6874, "eval_samples_per_second": 935.68, "eval_steps_per_second": 1.216, "step": 206 }, { "epoch": 104.0, "eval_loss": 1.4963877201080322, "eval_runtime": 10.6752, "eval_samples_per_second": 936.753, "eval_steps_per_second": 1.218, "step": 208 }, { "epoch": 105.0, "eval_loss": 1.4944827556610107, "eval_runtime": 10.6755, "eval_samples_per_second": 936.725, "eval_steps_per_second": 1.218, "step": 210 }, { "epoch": 106.0, "eval_loss": 1.4929250478744507, "eval_runtime": 10.682, "eval_samples_per_second": 936.158, "eval_steps_per_second": 1.217, "step": 212 }, { "epoch": 107.0, "eval_loss": 1.4915369749069214, "eval_runtime": 10.6838, "eval_samples_per_second": 935.995, "eval_steps_per_second": 1.217, "step": 214 }, { "epoch": 108.0, "eval_loss": 1.49076509475708, "eval_runtime": 10.6769, "eval_samples_per_second": 936.603, "eval_steps_per_second": 1.218, "step": 216 }, { "epoch": 109.0, "eval_loss": 1.4891281127929688, "eval_runtime": 10.6778, "eval_samples_per_second": 936.521, "eval_steps_per_second": 1.217, "step": 218 }, { "epoch": 110.0, "eval_loss": 1.486264944076538, "eval_runtime": 10.67, "eval_samples_per_second": 937.205, "eval_steps_per_second": 1.218, "step": 220 }, { "epoch": 111.0, "eval_loss": 1.481858730316162, "eval_runtime": 10.6684, "eval_samples_per_second": 937.351, "eval_steps_per_second": 1.219, "step": 222 }, { "epoch": 112.0, "eval_loss": 1.4790288209915161, "eval_runtime": 10.6764, "eval_samples_per_second": 936.646, "eval_steps_per_second": 1.218, "step": 224 }, { "epoch": 113.0, "eval_loss": 1.477858066558838, "eval_runtime": 10.672, "eval_samples_per_second": 937.028, "eval_steps_per_second": 1.218, "step": 226 }, { "epoch": 114.0, "eval_loss": 1.477932095527649, "eval_runtime": 10.6779, "eval_samples_per_second": 936.51, "eval_steps_per_second": 1.217, "step": 228 }, { "epoch": 115.0, "eval_loss": 1.4761333465576172, "eval_runtime": 10.6829, "eval_samples_per_second": 936.078, "eval_steps_per_second": 1.217, "step": 230 }, { "epoch": 116.0, "eval_loss": 1.472651481628418, "eval_runtime": 10.6687, "eval_samples_per_second": 937.324, "eval_steps_per_second": 1.219, "step": 232 }, { "epoch": 117.0, "eval_loss": 1.4697270393371582, "eval_runtime": 10.6649, "eval_samples_per_second": 937.657, "eval_steps_per_second": 1.219, "step": 234 }, { "epoch": 118.0, "eval_loss": 1.4677116870880127, "eval_runtime": 10.6723, "eval_samples_per_second": 937.001, "eval_steps_per_second": 1.218, "step": 236 }, { "epoch": 119.0, "eval_loss": 1.4658899307250977, "eval_runtime": 10.6745, "eval_samples_per_second": 936.811, "eval_steps_per_second": 1.218, "step": 238 }, { "epoch": 120.0, "eval_loss": 1.4640589952468872, "eval_runtime": 10.6687, "eval_samples_per_second": 937.323, "eval_steps_per_second": 1.219, "step": 240 }, { "epoch": 121.0, "eval_loss": 1.4623686075210571, "eval_runtime": 10.6735, "eval_samples_per_second": 936.896, "eval_steps_per_second": 1.218, "step": 242 }, { "epoch": 122.0, "eval_loss": 1.4619512557983398, "eval_runtime": 10.6694, "eval_samples_per_second": 937.256, "eval_steps_per_second": 1.218, "step": 244 }, { "epoch": 123.0, "eval_loss": 1.4633115530014038, "eval_runtime": 10.6685, "eval_samples_per_second": 937.343, "eval_steps_per_second": 1.219, "step": 246 }, { "epoch": 124.0, "eval_loss": 1.464633584022522, "eval_runtime": 10.6778, "eval_samples_per_second": 936.522, "eval_steps_per_second": 1.217, "step": 248 }, { "epoch": 125.0, "eval_loss": 1.464717984199524, "eval_runtime": 10.7082, "eval_samples_per_second": 933.861, "eval_steps_per_second": 1.214, "step": 250 }, { "epoch": 126.0, "eval_loss": 1.4619494676589966, "eval_runtime": 10.6743, "eval_samples_per_second": 936.83, "eval_steps_per_second": 1.218, "step": 252 }, { "epoch": 127.0, "eval_loss": 1.4600682258605957, "eval_runtime": 10.6839, "eval_samples_per_second": 935.989, "eval_steps_per_second": 1.217, "step": 254 }, { "epoch": 128.0, "eval_loss": 1.460466980934143, "eval_runtime": 10.6674, "eval_samples_per_second": 937.434, "eval_steps_per_second": 1.219, "step": 256 }, { "epoch": 129.0, "eval_loss": 1.4635497331619263, "eval_runtime": 10.685, "eval_samples_per_second": 935.888, "eval_steps_per_second": 1.217, "step": 258 }, { "epoch": 130.0, "eval_loss": 1.4629032611846924, "eval_runtime": 10.6911, "eval_samples_per_second": 935.356, "eval_steps_per_second": 1.216, "step": 260 }, { "epoch": 131.0, "eval_loss": 1.4596558809280396, "eval_runtime": 10.6919, "eval_samples_per_second": 935.291, "eval_steps_per_second": 1.216, "step": 262 }, { "epoch": 132.0, "eval_loss": 1.4560246467590332, "eval_runtime": 10.6769, "eval_samples_per_second": 936.603, "eval_steps_per_second": 1.218, "step": 264 }, { "epoch": 133.0, "eval_loss": 1.453616738319397, "eval_runtime": 10.6767, "eval_samples_per_second": 936.622, "eval_steps_per_second": 1.218, "step": 266 }, { "epoch": 134.0, "eval_loss": 1.4538458585739136, "eval_runtime": 10.6773, "eval_samples_per_second": 936.563, "eval_steps_per_second": 1.218, "step": 268 }, { "epoch": 135.0, "eval_loss": 1.4554544687271118, "eval_runtime": 10.6749, "eval_samples_per_second": 936.779, "eval_steps_per_second": 1.218, "step": 270 }, { "epoch": 136.0, "eval_loss": 1.458116054534912, "eval_runtime": 10.6729, "eval_samples_per_second": 936.95, "eval_steps_per_second": 1.218, "step": 272 }, { "epoch": 137.0, "eval_loss": 1.458404541015625, "eval_runtime": 10.6614, "eval_samples_per_second": 937.965, "eval_steps_per_second": 1.219, "step": 274 }, { "epoch": 138.0, "eval_loss": 1.4573506116867065, "eval_runtime": 10.6734, "eval_samples_per_second": 936.913, "eval_steps_per_second": 1.218, "step": 276 }, { "epoch": 139.0, "eval_loss": 1.4543631076812744, "eval_runtime": 10.6672, "eval_samples_per_second": 937.452, "eval_steps_per_second": 1.219, "step": 278 }, { "epoch": 140.0, "eval_loss": 1.4508144855499268, "eval_runtime": 10.6794, "eval_samples_per_second": 936.382, "eval_steps_per_second": 1.217, "step": 280 }, { "epoch": 141.0, "eval_loss": 1.447538137435913, "eval_runtime": 10.6758, "eval_samples_per_second": 936.702, "eval_steps_per_second": 1.218, "step": 282 }, { "epoch": 142.0, "eval_loss": 1.4447238445281982, "eval_runtime": 10.6659, "eval_samples_per_second": 937.568, "eval_steps_per_second": 1.219, "step": 284 }, { "epoch": 143.0, "eval_loss": 1.4447556734085083, "eval_runtime": 10.6716, "eval_samples_per_second": 937.067, "eval_steps_per_second": 1.218, "step": 286 }, { "epoch": 144.0, "eval_loss": 1.4442577362060547, "eval_runtime": 10.676, "eval_samples_per_second": 936.684, "eval_steps_per_second": 1.218, "step": 288 }, { "epoch": 145.0, "eval_loss": 1.4424697160720825, "eval_runtime": 10.668, "eval_samples_per_second": 937.381, "eval_steps_per_second": 1.219, "step": 290 }, { "epoch": 146.0, "eval_loss": 1.4408490657806396, "eval_runtime": 10.6762, "eval_samples_per_second": 936.664, "eval_steps_per_second": 1.218, "step": 292 }, { "epoch": 147.0, "eval_loss": 1.4410812854766846, "eval_runtime": 10.6701, "eval_samples_per_second": 937.198, "eval_steps_per_second": 1.218, "step": 294 }, { "epoch": 148.0, "eval_loss": 1.4422898292541504, "eval_runtime": 10.7048, "eval_samples_per_second": 934.163, "eval_steps_per_second": 1.214, "step": 296 }, { "epoch": 149.0, "eval_loss": 1.4430429935455322, "eval_runtime": 10.7029, "eval_samples_per_second": 934.33, "eval_steps_per_second": 1.215, "step": 298 }, { "epoch": 150.0, "eval_loss": 1.4431097507476807, "eval_runtime": 10.701, "eval_samples_per_second": 934.49, "eval_steps_per_second": 1.215, "step": 300 }, { "epoch": 151.0, "eval_loss": 1.4416182041168213, "eval_runtime": 10.7066, "eval_samples_per_second": 934.002, "eval_steps_per_second": 1.214, "step": 302 }, { "epoch": 152.0, "eval_loss": 1.4399303197860718, "eval_runtime": 10.6813, "eval_samples_per_second": 936.212, "eval_steps_per_second": 1.217, "step": 304 }, { "epoch": 153.0, "eval_loss": 1.437761664390564, "eval_runtime": 10.6766, "eval_samples_per_second": 936.628, "eval_steps_per_second": 1.218, "step": 306 }, { "epoch": 154.0, "eval_loss": 1.436241865158081, "eval_runtime": 10.6783, "eval_samples_per_second": 936.479, "eval_steps_per_second": 1.217, "step": 308 }, { "epoch": 155.0, "eval_loss": 1.4360020160675049, "eval_runtime": 10.7009, "eval_samples_per_second": 934.499, "eval_steps_per_second": 1.215, "step": 310 }, { "epoch": 156.0, "eval_loss": 1.4338910579681396, "eval_runtime": 10.6868, "eval_samples_per_second": 935.733, "eval_steps_per_second": 1.216, "step": 312 }, { "epoch": 157.0, "eval_loss": 1.4325634241104126, "eval_runtime": 10.6777, "eval_samples_per_second": 936.527, "eval_steps_per_second": 1.217, "step": 314 }, { "epoch": 158.0, "eval_loss": 1.4329997301101685, "eval_runtime": 10.6692, "eval_samples_per_second": 937.275, "eval_steps_per_second": 1.218, "step": 316 }, { "epoch": 159.0, "eval_loss": 1.4324475526809692, "eval_runtime": 10.6817, "eval_samples_per_second": 936.178, "eval_steps_per_second": 1.217, "step": 318 }, { "epoch": 160.0, "eval_loss": 1.4320900440216064, "eval_runtime": 10.6759, "eval_samples_per_second": 936.685, "eval_steps_per_second": 1.218, "step": 320 }, { "epoch": 161.0, "eval_loss": 1.4325189590454102, "eval_runtime": 10.6696, "eval_samples_per_second": 937.244, "eval_steps_per_second": 1.218, "step": 322 }, { "epoch": 162.0, "eval_loss": 1.4319082498550415, "eval_runtime": 10.6597, "eval_samples_per_second": 938.116, "eval_steps_per_second": 1.22, "step": 324 }, { "epoch": 163.0, "eval_loss": 1.432405710220337, "eval_runtime": 10.6632, "eval_samples_per_second": 937.807, "eval_steps_per_second": 1.219, "step": 326 }, { "epoch": 164.0, "eval_loss": 1.4340990781784058, "eval_runtime": 10.6793, "eval_samples_per_second": 936.393, "eval_steps_per_second": 1.217, "step": 328 }, { "epoch": 165.0, "eval_loss": 1.4348700046539307, "eval_runtime": 10.6745, "eval_samples_per_second": 936.81, "eval_steps_per_second": 1.218, "step": 330 }, { "epoch": 166.0, "eval_loss": 1.4340572357177734, "eval_runtime": 10.6758, "eval_samples_per_second": 936.694, "eval_steps_per_second": 1.218, "step": 332 }, { "epoch": 167.0, "eval_loss": 1.432230830192566, "eval_runtime": 10.6679, "eval_samples_per_second": 937.39, "eval_steps_per_second": 1.219, "step": 334 }, { "epoch": 168.0, "eval_loss": 1.4290224313735962, "eval_runtime": 10.6717, "eval_samples_per_second": 937.056, "eval_steps_per_second": 1.218, "step": 336 }, { "epoch": 169.0, "eval_loss": 1.4260591268539429, "eval_runtime": 10.6646, "eval_samples_per_second": 937.683, "eval_steps_per_second": 1.219, "step": 338 }, { "epoch": 170.0, "eval_loss": 1.4241833686828613, "eval_runtime": 10.6743, "eval_samples_per_second": 936.827, "eval_steps_per_second": 1.218, "step": 340 }, { "epoch": 171.0, "eval_loss": 1.4232484102249146, "eval_runtime": 10.6668, "eval_samples_per_second": 937.489, "eval_steps_per_second": 1.219, "step": 342 }, { "epoch": 172.0, "eval_loss": 1.4233402013778687, "eval_runtime": 10.674, "eval_samples_per_second": 936.859, "eval_steps_per_second": 1.218, "step": 344 }, { "epoch": 173.0, "eval_loss": 1.4248952865600586, "eval_runtime": 10.6819, "eval_samples_per_second": 936.165, "eval_steps_per_second": 1.217, "step": 346 }, { "epoch": 174.0, "eval_loss": 1.4270906448364258, "eval_runtime": 10.6759, "eval_samples_per_second": 936.692, "eval_steps_per_second": 1.218, "step": 348 }, { "epoch": 175.0, "eval_loss": 1.4285681247711182, "eval_runtime": 10.6797, "eval_samples_per_second": 936.356, "eval_steps_per_second": 1.217, "step": 350 }, { "epoch": 176.0, "eval_loss": 1.4294575452804565, "eval_runtime": 10.6749, "eval_samples_per_second": 936.78, "eval_steps_per_second": 1.218, "step": 352 }, { "epoch": 177.0, "eval_loss": 1.428400993347168, "eval_runtime": 10.6621, "eval_samples_per_second": 937.902, "eval_steps_per_second": 1.219, "step": 354 }, { "epoch": 178.0, "eval_loss": 1.4263620376586914, "eval_runtime": 10.6806, "eval_samples_per_second": 936.281, "eval_steps_per_second": 1.217, "step": 356 }, { "epoch": 179.0, "eval_loss": 1.4240350723266602, "eval_runtime": 10.669, "eval_samples_per_second": 937.296, "eval_steps_per_second": 1.218, "step": 358 }, { "epoch": 180.0, "eval_loss": 1.4221690893173218, "eval_runtime": 10.6788, "eval_samples_per_second": 936.431, "eval_steps_per_second": 1.217, "step": 360 }, { "epoch": 181.0, "eval_loss": 1.4208089113235474, "eval_runtime": 10.6801, "eval_samples_per_second": 936.319, "eval_steps_per_second": 1.217, "step": 362 } ], "logging_steps": 500, "max_steps": 400, "num_train_epochs": 200, "save_steps": 500, "total_flos": 5196677296128000.0, "trial_name": null, "trial_params": null }