{ "best_metric": 0.5188751220703125, "best_model_checkpoint": "AlexWang99/byt5_add_2k/checkpoint-450", "epoch": 150.0, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 7.222967147827148, "eval_runtime": 10.6822, "eval_samples_per_second": 936.137, "eval_steps_per_second": 1.217, "step": 3 }, { "epoch": 2.0, "eval_loss": 5.985061168670654, "eval_runtime": 10.9203, "eval_samples_per_second": 915.722, "eval_steps_per_second": 1.19, "step": 6 }, { "epoch": 3.0, "eval_loss": 5.1600236892700195, "eval_runtime": 10.7228, "eval_samples_per_second": 932.591, "eval_steps_per_second": 1.212, "step": 9 }, { "epoch": 4.0, "eval_loss": 4.493813514709473, "eval_runtime": 11.0064, "eval_samples_per_second": 908.565, "eval_steps_per_second": 1.181, "step": 12 }, { "epoch": 5.0, "eval_loss": 3.8893628120422363, "eval_runtime": 10.8904, "eval_samples_per_second": 918.238, "eval_steps_per_second": 1.194, "step": 15 }, { "epoch": 6.0, "eval_loss": 3.341691493988037, "eval_runtime": 10.8018, "eval_samples_per_second": 925.772, "eval_steps_per_second": 1.204, "step": 18 }, { "epoch": 7.0, "eval_loss": 2.9026920795440674, "eval_runtime": 10.8887, "eval_samples_per_second": 918.384, "eval_steps_per_second": 1.194, "step": 21 }, { "epoch": 8.0, "eval_loss": 2.606121063232422, "eval_runtime": 10.8106, "eval_samples_per_second": 925.018, "eval_steps_per_second": 1.203, "step": 24 }, { "epoch": 9.0, "eval_loss": 2.402636766433716, "eval_runtime": 11.0832, "eval_samples_per_second": 902.265, "eval_steps_per_second": 1.173, "step": 27 }, { "epoch": 10.0, "eval_loss": 2.1915464401245117, "eval_runtime": 10.8272, "eval_samples_per_second": 923.604, "eval_steps_per_second": 1.201, "step": 30 }, { "epoch": 11.0, "eval_loss": 2.1069843769073486, "eval_runtime": 10.8305, "eval_samples_per_second": 923.32, "eval_steps_per_second": 1.2, "step": 33 }, { "epoch": 12.0, "eval_loss": 2.0630228519439697, "eval_runtime": 10.9163, "eval_samples_per_second": 916.063, "eval_steps_per_second": 1.191, "step": 36 }, { "epoch": 13.0, "eval_loss": 1.9474797248840332, "eval_runtime": 10.8302, "eval_samples_per_second": 923.345, "eval_steps_per_second": 1.2, "step": 39 }, { "epoch": 14.0, "eval_loss": 1.9065855741500854, "eval_runtime": 11.0817, "eval_samples_per_second": 902.388, "eval_steps_per_second": 1.173, "step": 42 }, { "epoch": 15.0, "eval_loss": 1.8849406242370605, "eval_runtime": 10.8362, "eval_samples_per_second": 922.835, "eval_steps_per_second": 1.2, "step": 45 }, { "epoch": 16.0, "eval_loss": 1.7817472219467163, "eval_runtime": 10.8435, "eval_samples_per_second": 922.207, "eval_steps_per_second": 1.199, "step": 48 }, { "epoch": 17.0, "eval_loss": 1.7877730131149292, "eval_runtime": 10.9315, "eval_samples_per_second": 914.788, "eval_steps_per_second": 1.189, "step": 51 }, { "epoch": 18.0, "eval_loss": 1.7331796884536743, "eval_runtime": 10.8485, "eval_samples_per_second": 921.788, "eval_steps_per_second": 1.198, "step": 54 }, { "epoch": 19.0, "eval_loss": 1.692647099494934, "eval_runtime": 11.0859, "eval_samples_per_second": 902.043, "eval_steps_per_second": 1.173, "step": 57 }, { "epoch": 20.0, "eval_loss": 1.6727197170257568, "eval_runtime": 10.8467, "eval_samples_per_second": 921.936, "eval_steps_per_second": 1.199, "step": 60 }, { "epoch": 21.0, "eval_loss": 1.66093909740448, "eval_runtime": 10.8432, "eval_samples_per_second": 922.235, "eval_steps_per_second": 1.199, "step": 63 }, { "epoch": 22.0, "eval_loss": 1.6616569757461548, "eval_runtime": 10.9265, "eval_samples_per_second": 915.204, "eval_steps_per_second": 1.19, "step": 66 }, { "epoch": 23.0, "eval_loss": 1.6537153720855713, "eval_runtime": 10.8532, "eval_samples_per_second": 921.386, "eval_steps_per_second": 1.198, "step": 69 }, { "epoch": 24.0, "eval_loss": 1.645085334777832, "eval_runtime": 11.0411, "eval_samples_per_second": 905.709, "eval_steps_per_second": 1.177, "step": 72 }, { "epoch": 25.0, "eval_loss": 1.6413605213165283, "eval_runtime": 10.7889, "eval_samples_per_second": 926.88, "eval_steps_per_second": 1.205, "step": 75 }, { "epoch": 26.0, "eval_loss": 1.635939598083496, "eval_runtime": 10.8026, "eval_samples_per_second": 925.7, "eval_steps_per_second": 1.203, "step": 78 }, { "epoch": 27.0, "eval_loss": 1.6321375370025635, "eval_runtime": 10.9127, "eval_samples_per_second": 916.367, "eval_steps_per_second": 1.191, "step": 81 }, { "epoch": 28.0, "eval_loss": 1.6276355981826782, "eval_runtime": 10.8251, "eval_samples_per_second": 923.783, "eval_steps_per_second": 1.201, "step": 84 }, { "epoch": 29.0, "eval_loss": 1.6232202053070068, "eval_runtime": 11.0643, "eval_samples_per_second": 903.805, "eval_steps_per_second": 1.175, "step": 87 }, { "epoch": 30.0, "eval_loss": 1.618307113647461, "eval_runtime": 10.8447, "eval_samples_per_second": 922.106, "eval_steps_per_second": 1.199, "step": 90 }, { "epoch": 31.0, "eval_loss": 1.612666368484497, "eval_runtime": 10.8371, "eval_samples_per_second": 922.759, "eval_steps_per_second": 1.2, "step": 93 }, { "epoch": 32.0, "eval_loss": 1.6067509651184082, "eval_runtime": 10.9144, "eval_samples_per_second": 916.219, "eval_steps_per_second": 1.191, "step": 96 }, { "epoch": 33.0, "eval_loss": 1.5994765758514404, "eval_runtime": 10.8587, "eval_samples_per_second": 920.924, "eval_steps_per_second": 1.197, "step": 99 }, { "epoch": 34.0, "eval_loss": 1.590742588043213, "eval_runtime": 11.0839, "eval_samples_per_second": 902.211, "eval_steps_per_second": 1.173, "step": 102 }, { "epoch": 35.0, "eval_loss": 1.5813907384872437, "eval_runtime": 10.8372, "eval_samples_per_second": 922.744, "eval_steps_per_second": 1.2, "step": 105 }, { "epoch": 36.0, "eval_loss": 1.570920467376709, "eval_runtime": 10.8433, "eval_samples_per_second": 922.23, "eval_steps_per_second": 1.199, "step": 108 }, { "epoch": 37.0, "eval_loss": 1.5595602989196777, "eval_runtime": 10.9304, "eval_samples_per_second": 914.879, "eval_steps_per_second": 1.189, "step": 111 }, { "epoch": 38.0, "eval_loss": 1.5501924753189087, "eval_runtime": 10.8429, "eval_samples_per_second": 922.259, "eval_steps_per_second": 1.199, "step": 114 }, { "epoch": 39.0, "eval_loss": 1.5431841611862183, "eval_runtime": 11.1003, "eval_samples_per_second": 900.873, "eval_steps_per_second": 1.171, "step": 117 }, { "epoch": 40.0, "eval_loss": 1.5323028564453125, "eval_runtime": 10.8468, "eval_samples_per_second": 921.928, "eval_steps_per_second": 1.199, "step": 120 }, { "epoch": 41.0, "eval_loss": 1.5221501588821411, "eval_runtime": 10.8395, "eval_samples_per_second": 922.551, "eval_steps_per_second": 1.199, "step": 123 }, { "epoch": 42.0, "eval_loss": 1.5130882263183594, "eval_runtime": 10.9396, "eval_samples_per_second": 914.108, "eval_steps_per_second": 1.188, "step": 126 }, { "epoch": 43.0, "eval_loss": 1.5008268356323242, "eval_runtime": 10.8617, "eval_samples_per_second": 920.668, "eval_steps_per_second": 1.197, "step": 129 }, { "epoch": 44.0, "eval_loss": 1.487107515335083, "eval_runtime": 11.0741, "eval_samples_per_second": 903.011, "eval_steps_per_second": 1.174, "step": 132 }, { "epoch": 45.0, "eval_loss": 1.4803069829940796, "eval_runtime": 10.8489, "eval_samples_per_second": 921.753, "eval_steps_per_second": 1.198, "step": 135 }, { "epoch": 46.0, "eval_loss": 1.4744497537612915, "eval_runtime": 10.8329, "eval_samples_per_second": 923.112, "eval_steps_per_second": 1.2, "step": 138 }, { "epoch": 47.0, "eval_loss": 1.4682186841964722, "eval_runtime": 11.0865, "eval_samples_per_second": 902.0, "eval_steps_per_second": 1.173, "step": 141 }, { "epoch": 48.0, "eval_loss": 1.4509010314941406, "eval_runtime": 10.862, "eval_samples_per_second": 920.64, "eval_steps_per_second": 1.197, "step": 144 }, { "epoch": 49.0, "eval_loss": 1.4542888402938843, "eval_runtime": 11.0884, "eval_samples_per_second": 901.845, "eval_steps_per_second": 1.172, "step": 147 }, { "epoch": 50.0, "eval_loss": 1.455941081047058, "eval_runtime": 10.8598, "eval_samples_per_second": 920.824, "eval_steps_per_second": 1.197, "step": 150 }, { "epoch": 51.0, "eval_loss": 1.4334802627563477, "eval_runtime": 10.8586, "eval_samples_per_second": 920.928, "eval_steps_per_second": 1.197, "step": 153 }, { "epoch": 52.0, "eval_loss": 1.4138292074203491, "eval_runtime": 10.87, "eval_samples_per_second": 919.964, "eval_steps_per_second": 1.196, "step": 156 }, { "epoch": 53.0, "eval_loss": 1.4225151538848877, "eval_runtime": 10.8028, "eval_samples_per_second": 925.687, "eval_steps_per_second": 1.203, "step": 159 }, { "epoch": 54.0, "eval_loss": 1.4350632429122925, "eval_runtime": 11.0684, "eval_samples_per_second": 903.472, "eval_steps_per_second": 1.175, "step": 162 }, { "epoch": 55.0, "eval_loss": 1.3809276819229126, "eval_runtime": 10.7633, "eval_samples_per_second": 929.082, "eval_steps_per_second": 1.208, "step": 165 }, { "epoch": 56.0, "eval_loss": 1.370450735092163, "eval_runtime": 10.785, "eval_samples_per_second": 927.215, "eval_steps_per_second": 1.205, "step": 168 }, { "epoch": 57.0, "eval_loss": 1.3955777883529663, "eval_runtime": 10.895, "eval_samples_per_second": 917.848, "eval_steps_per_second": 1.193, "step": 171 }, { "epoch": 58.0, "eval_loss": 1.368485927581787, "eval_runtime": 10.819, "eval_samples_per_second": 924.297, "eval_steps_per_second": 1.202, "step": 174 }, { "epoch": 59.0, "eval_loss": 1.33143949508667, "eval_runtime": 10.8396, "eval_samples_per_second": 922.543, "eval_steps_per_second": 1.199, "step": 177 }, { "epoch": 60.0, "eval_loss": 1.3212394714355469, "eval_runtime": 10.8092, "eval_samples_per_second": 925.139, "eval_steps_per_second": 1.203, "step": 180 }, { "epoch": 61.0, "eval_loss": 1.3334152698516846, "eval_runtime": 10.9031, "eval_samples_per_second": 917.174, "eval_steps_per_second": 1.192, "step": 183 }, { "epoch": 62.0, "eval_loss": 1.3178024291992188, "eval_runtime": 10.8916, "eval_samples_per_second": 918.135, "eval_steps_per_second": 1.194, "step": 186 }, { "epoch": 63.0, "eval_loss": 1.2850910425186157, "eval_runtime": 10.7826, "eval_samples_per_second": 927.417, "eval_steps_per_second": 1.206, "step": 189 }, { "epoch": 64.0, "eval_loss": 1.2826738357543945, "eval_runtime": 11.0431, "eval_samples_per_second": 905.544, "eval_steps_per_second": 1.177, "step": 192 }, { "epoch": 65.0, "eval_loss": 1.276419758796692, "eval_runtime": 10.8459, "eval_samples_per_second": 922.009, "eval_steps_per_second": 1.199, "step": 195 }, { "epoch": 66.0, "eval_loss": 1.2498911619186401, "eval_runtime": 10.8147, "eval_samples_per_second": 924.671, "eval_steps_per_second": 1.202, "step": 198 }, { "epoch": 67.0, "eval_loss": 1.2304234504699707, "eval_runtime": 10.9437, "eval_samples_per_second": 913.768, "eval_steps_per_second": 1.188, "step": 201 }, { "epoch": 68.0, "eval_loss": 1.2450603246688843, "eval_runtime": 10.8378, "eval_samples_per_second": 922.698, "eval_steps_per_second": 1.2, "step": 204 }, { "epoch": 69.0, "eval_loss": 1.2278028726577759, "eval_runtime": 11.123, "eval_samples_per_second": 899.041, "eval_steps_per_second": 1.169, "step": 207 }, { "epoch": 70.0, "eval_loss": 1.2043875455856323, "eval_runtime": 10.7963, "eval_samples_per_second": 926.245, "eval_steps_per_second": 1.204, "step": 210 }, { "epoch": 71.0, "eval_loss": 1.199507236480713, "eval_runtime": 10.8038, "eval_samples_per_second": 925.601, "eval_steps_per_second": 1.203, "step": 213 }, { "epoch": 72.0, "eval_loss": 1.1909499168395996, "eval_runtime": 10.9182, "eval_samples_per_second": 915.902, "eval_steps_per_second": 1.191, "step": 216 }, { "epoch": 73.0, "eval_loss": 1.172565221786499, "eval_runtime": 10.8472, "eval_samples_per_second": 921.898, "eval_steps_per_second": 1.198, "step": 219 }, { "epoch": 74.0, "eval_loss": 1.1473827362060547, "eval_runtime": 11.0541, "eval_samples_per_second": 904.638, "eval_steps_per_second": 1.176, "step": 222 }, { "epoch": 75.0, "eval_loss": 1.1195847988128662, "eval_runtime": 10.924, "eval_samples_per_second": 915.419, "eval_steps_per_second": 1.19, "step": 225 }, { "epoch": 76.0, "eval_loss": 1.1185705661773682, "eval_runtime": 10.8085, "eval_samples_per_second": 925.199, "eval_steps_per_second": 1.203, "step": 228 }, { "epoch": 77.0, "eval_loss": 1.1256279945373535, "eval_runtime": 11.0649, "eval_samples_per_second": 903.755, "eval_steps_per_second": 1.175, "step": 231 }, { "epoch": 78.0, "eval_loss": 1.0834866762161255, "eval_runtime": 10.8281, "eval_samples_per_second": 923.523, "eval_steps_per_second": 1.201, "step": 234 }, { "epoch": 79.0, "eval_loss": 1.0588449239730835, "eval_runtime": 10.7832, "eval_samples_per_second": 927.372, "eval_steps_per_second": 1.206, "step": 237 }, { "epoch": 80.0, "eval_loss": 1.0453879833221436, "eval_runtime": 10.8835, "eval_samples_per_second": 918.819, "eval_steps_per_second": 1.194, "step": 240 }, { "epoch": 81.0, "eval_loss": 1.0334476232528687, "eval_runtime": 10.8064, "eval_samples_per_second": 925.376, "eval_steps_per_second": 1.203, "step": 243 }, { "epoch": 82.0, "eval_loss": 1.028348684310913, "eval_runtime": 10.9037, "eval_samples_per_second": 917.121, "eval_steps_per_second": 1.192, "step": 246 }, { "epoch": 83.0, "eval_loss": 1.0091253519058228, "eval_runtime": 10.8016, "eval_samples_per_second": 925.788, "eval_steps_per_second": 1.204, "step": 249 }, { "epoch": 84.0, "eval_loss": 0.9820342659950256, "eval_runtime": 11.0485, "eval_samples_per_second": 905.101, "eval_steps_per_second": 1.177, "step": 252 }, { "epoch": 85.0, "eval_loss": 0.9488591551780701, "eval_runtime": 10.8216, "eval_samples_per_second": 924.074, "eval_steps_per_second": 1.201, "step": 255 }, { "epoch": 86.0, "eval_loss": 0.9406836032867432, "eval_runtime": 10.8253, "eval_samples_per_second": 923.765, "eval_steps_per_second": 1.201, "step": 258 }, { "epoch": 87.0, "eval_loss": 0.9392226338386536, "eval_runtime": 10.9531, "eval_samples_per_second": 912.984, "eval_steps_per_second": 1.187, "step": 261 }, { "epoch": 88.0, "eval_loss": 0.9232249855995178, "eval_runtime": 10.835, "eval_samples_per_second": 922.938, "eval_steps_per_second": 1.2, "step": 264 }, { "epoch": 89.0, "eval_loss": 0.902049720287323, "eval_runtime": 10.915, "eval_samples_per_second": 916.173, "eval_steps_per_second": 1.191, "step": 267 }, { "epoch": 90.0, "eval_loss": 0.8934366106987, "eval_runtime": 10.833, "eval_samples_per_second": 923.105, "eval_steps_per_second": 1.2, "step": 270 }, { "epoch": 91.0, "eval_loss": 0.8797369003295898, "eval_runtime": 10.8275, "eval_samples_per_second": 923.576, "eval_steps_per_second": 1.201, "step": 273 }, { "epoch": 92.0, "eval_loss": 0.8834591507911682, "eval_runtime": 10.8227, "eval_samples_per_second": 923.98, "eval_steps_per_second": 1.201, "step": 276 }, { "epoch": 93.0, "eval_loss": 0.8583576083183289, "eval_runtime": 10.8262, "eval_samples_per_second": 923.681, "eval_steps_per_second": 1.201, "step": 279 }, { "epoch": 94.0, "eval_loss": 0.8160658478736877, "eval_runtime": 10.8597, "eval_samples_per_second": 920.834, "eval_steps_per_second": 1.197, "step": 282 }, { "epoch": 95.0, "eval_loss": 0.7998712658882141, "eval_runtime": 10.7874, "eval_samples_per_second": 927.007, "eval_steps_per_second": 1.205, "step": 285 }, { "epoch": 96.0, "eval_loss": 0.8268041014671326, "eval_runtime": 11.0529, "eval_samples_per_second": 904.74, "eval_steps_per_second": 1.176, "step": 288 }, { "epoch": 97.0, "eval_loss": 0.8250266313552856, "eval_runtime": 10.8021, "eval_samples_per_second": 925.746, "eval_steps_per_second": 1.203, "step": 291 }, { "epoch": 98.0, "eval_loss": 0.7785258293151855, "eval_runtime": 10.7519, "eval_samples_per_second": 930.071, "eval_steps_per_second": 1.209, "step": 294 }, { "epoch": 99.0, "eval_loss": 0.7797490358352661, "eval_runtime": 10.8842, "eval_samples_per_second": 918.761, "eval_steps_per_second": 1.194, "step": 297 }, { "epoch": 100.0, "eval_loss": 0.7988857626914978, "eval_runtime": 10.8165, "eval_samples_per_second": 924.512, "eval_steps_per_second": 1.202, "step": 300 }, { "epoch": 101.0, "eval_loss": 0.7674239277839661, "eval_runtime": 10.8984, "eval_samples_per_second": 917.569, "eval_steps_per_second": 1.193, "step": 303 }, { "epoch": 102.0, "eval_loss": 0.7283704280853271, "eval_runtime": 10.7882, "eval_samples_per_second": 926.941, "eval_steps_per_second": 1.205, "step": 306 }, { "epoch": 103.0, "eval_loss": 0.7265847325325012, "eval_runtime": 10.7846, "eval_samples_per_second": 927.248, "eval_steps_per_second": 1.205, "step": 309 }, { "epoch": 104.0, "eval_loss": 0.7218329906463623, "eval_runtime": 10.8432, "eval_samples_per_second": 922.237, "eval_steps_per_second": 1.199, "step": 312 }, { "epoch": 105.0, "eval_loss": 0.7195408344268799, "eval_runtime": 10.775, "eval_samples_per_second": 928.073, "eval_steps_per_second": 1.206, "step": 315 }, { "epoch": 106.0, "eval_loss": 0.720020592212677, "eval_runtime": 11.033, "eval_samples_per_second": 906.372, "eval_steps_per_second": 1.178, "step": 318 }, { "epoch": 107.0, "eval_loss": 0.7001694440841675, "eval_runtime": 10.7937, "eval_samples_per_second": 926.47, "eval_steps_per_second": 1.204, "step": 321 }, { "epoch": 108.0, "eval_loss": 0.6704011559486389, "eval_runtime": 10.9903, "eval_samples_per_second": 909.893, "eval_steps_per_second": 1.183, "step": 324 }, { "epoch": 109.0, "eval_loss": 0.6617050766944885, "eval_runtime": 11.0129, "eval_samples_per_second": 908.026, "eval_steps_per_second": 1.18, "step": 327 }, { "epoch": 110.0, "eval_loss": 0.6687906384468079, "eval_runtime": 10.7417, "eval_samples_per_second": 930.948, "eval_steps_per_second": 1.21, "step": 330 }, { "epoch": 111.0, "eval_loss": 0.6637664437294006, "eval_runtime": 10.8682, "eval_samples_per_second": 920.118, "eval_steps_per_second": 1.196, "step": 333 }, { "epoch": 112.0, "eval_loss": 0.6454914212226868, "eval_runtime": 10.7981, "eval_samples_per_second": 926.087, "eval_steps_per_second": 1.204, "step": 336 }, { "epoch": 113.0, "eval_loss": 0.6308099627494812, "eval_runtime": 10.8495, "eval_samples_per_second": 921.699, "eval_steps_per_second": 1.198, "step": 339 }, { "epoch": 114.0, "eval_loss": 0.6411617398262024, "eval_runtime": 10.9089, "eval_samples_per_second": 916.679, "eval_steps_per_second": 1.192, "step": 342 }, { "epoch": 115.0, "eval_loss": 0.6422205567359924, "eval_runtime": 10.7825, "eval_samples_per_second": 927.428, "eval_steps_per_second": 1.206, "step": 345 }, { "epoch": 116.0, "eval_loss": 0.6265988349914551, "eval_runtime": 10.865, "eval_samples_per_second": 920.387, "eval_steps_per_second": 1.197, "step": 348 }, { "epoch": 117.0, "eval_loss": 0.615440845489502, "eval_runtime": 10.8031, "eval_samples_per_second": 925.657, "eval_steps_per_second": 1.203, "step": 351 }, { "epoch": 118.0, "eval_loss": 0.6053263545036316, "eval_runtime": 10.8755, "eval_samples_per_second": 919.496, "eval_steps_per_second": 1.195, "step": 354 }, { "epoch": 119.0, "eval_loss": 0.6083167791366577, "eval_runtime": 10.861, "eval_samples_per_second": 920.722, "eval_steps_per_second": 1.197, "step": 357 }, { "epoch": 120.0, "eval_loss": 0.607414960861206, "eval_runtime": 10.7885, "eval_samples_per_second": 926.909, "eval_steps_per_second": 1.205, "step": 360 }, { "epoch": 121.0, "eval_loss": 0.6041896343231201, "eval_runtime": 10.8677, "eval_samples_per_second": 920.161, "eval_steps_per_second": 1.196, "step": 363 }, { "epoch": 122.0, "eval_loss": 0.5942515134811401, "eval_runtime": 10.786, "eval_samples_per_second": 927.126, "eval_steps_per_second": 1.205, "step": 366 }, { "epoch": 123.0, "eval_loss": 0.5849249958992004, "eval_runtime": 10.8776, "eval_samples_per_second": 919.318, "eval_steps_per_second": 1.195, "step": 369 }, { "epoch": 124.0, "eval_loss": 0.5770248770713806, "eval_runtime": 10.8665, "eval_samples_per_second": 920.261, "eval_steps_per_second": 1.196, "step": 372 }, { "epoch": 125.0, "eval_loss": 0.5670948028564453, "eval_runtime": 10.7839, "eval_samples_per_second": 927.305, "eval_steps_per_second": 1.205, "step": 375 }, { "epoch": 126.0, "eval_loss": 0.5637474656105042, "eval_runtime": 10.8663, "eval_samples_per_second": 920.276, "eval_steps_per_second": 1.196, "step": 378 }, { "epoch": 127.0, "eval_loss": 0.5640723705291748, "eval_runtime": 10.7903, "eval_samples_per_second": 926.754, "eval_steps_per_second": 1.205, "step": 381 }, { "epoch": 128.0, "eval_loss": 0.5665525197982788, "eval_runtime": 10.8832, "eval_samples_per_second": 918.844, "eval_steps_per_second": 1.194, "step": 384 }, { "epoch": 129.0, "eval_loss": 0.5674743056297302, "eval_runtime": 10.8693, "eval_samples_per_second": 920.019, "eval_steps_per_second": 1.196, "step": 387 }, { "epoch": 130.0, "eval_loss": 0.558768630027771, "eval_runtime": 10.7538, "eval_samples_per_second": 929.908, "eval_steps_per_second": 1.209, "step": 390 }, { "epoch": 131.0, "eval_loss": 0.5466377139091492, "eval_runtime": 10.8966, "eval_samples_per_second": 917.721, "eval_steps_per_second": 1.193, "step": 393 }, { "epoch": 132.0, "eval_loss": 0.5402641296386719, "eval_runtime": 10.8007, "eval_samples_per_second": 925.864, "eval_steps_per_second": 1.204, "step": 396 }, { "epoch": 133.0, "eval_loss": 0.540850043296814, "eval_runtime": 10.8814, "eval_samples_per_second": 918.996, "eval_steps_per_second": 1.195, "step": 399 }, { "epoch": 134.0, "eval_loss": 0.5402743220329285, "eval_runtime": 10.8778, "eval_samples_per_second": 919.303, "eval_steps_per_second": 1.195, "step": 402 }, { "epoch": 135.0, "eval_loss": 0.5400083065032959, "eval_runtime": 10.7631, "eval_samples_per_second": 929.098, "eval_steps_per_second": 1.208, "step": 405 }, { "epoch": 136.0, "eval_loss": 0.5397944450378418, "eval_runtime": 10.8575, "eval_samples_per_second": 921.018, "eval_steps_per_second": 1.197, "step": 408 }, { "epoch": 137.0, "eval_loss": 0.5391473770141602, "eval_runtime": 10.8073, "eval_samples_per_second": 925.299, "eval_steps_per_second": 1.203, "step": 411 }, { "epoch": 138.0, "eval_loss": 0.5366548895835876, "eval_runtime": 10.8743, "eval_samples_per_second": 919.601, "eval_steps_per_second": 1.195, "step": 414 }, { "epoch": 139.0, "eval_loss": 0.5325959920883179, "eval_runtime": 10.8603, "eval_samples_per_second": 920.781, "eval_steps_per_second": 1.197, "step": 417 }, { "epoch": 140.0, "eval_loss": 0.5299940705299377, "eval_runtime": 10.7821, "eval_samples_per_second": 927.467, "eval_steps_per_second": 1.206, "step": 420 }, { "epoch": 141.0, "eval_loss": 0.5305985808372498, "eval_runtime": 10.8496, "eval_samples_per_second": 921.697, "eval_steps_per_second": 1.198, "step": 423 }, { "epoch": 142.0, "eval_loss": 0.5292918682098389, "eval_runtime": 10.7928, "eval_samples_per_second": 926.542, "eval_steps_per_second": 1.205, "step": 426 }, { "epoch": 143.0, "eval_loss": 0.5267909169197083, "eval_runtime": 10.8928, "eval_samples_per_second": 918.04, "eval_steps_per_second": 1.193, "step": 429 }, { "epoch": 144.0, "eval_loss": 0.523544430732727, "eval_runtime": 10.8792, "eval_samples_per_second": 919.184, "eval_steps_per_second": 1.195, "step": 432 }, { "epoch": 145.0, "eval_loss": 0.5205994248390198, "eval_runtime": 10.7887, "eval_samples_per_second": 926.892, "eval_steps_per_second": 1.205, "step": 435 }, { "epoch": 146.0, "eval_loss": 0.519413411617279, "eval_runtime": 10.8656, "eval_samples_per_second": 920.336, "eval_steps_per_second": 1.196, "step": 438 }, { "epoch": 147.0, "eval_loss": 0.519058108329773, "eval_runtime": 10.7991, "eval_samples_per_second": 926.005, "eval_steps_per_second": 1.204, "step": 441 }, { "epoch": 148.0, "eval_loss": 0.5188890099525452, "eval_runtime": 10.8757, "eval_samples_per_second": 919.48, "eval_steps_per_second": 1.195, "step": 444 }, { "epoch": 149.0, "eval_loss": 0.5188961625099182, "eval_runtime": 10.8633, "eval_samples_per_second": 920.533, "eval_steps_per_second": 1.197, "step": 447 }, { "epoch": 150.0, "eval_loss": 0.5188751220703125, "eval_runtime": 10.8003, "eval_samples_per_second": 925.904, "eval_steps_per_second": 1.204, "step": 450 } ], "logging_steps": 500, "max_steps": 450, "num_train_epochs": 150, "save_steps": 500, "total_flos": 8613277286400000.0, "trial_name": null, "trial_params": null }