{ "best_metric": 2.270662546157837, "best_model_checkpoint": "./model_tweets_2020_Q4_50_rand/checkpoint-2240000", "epoch": 10.105220609597433, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "eval_loss": 2.3816494941711426, "eval_runtime": 236.9637, "eval_samples_per_second": 844.011, "eval_steps_per_second": 52.751, "step": 8000 }, { "epoch": 0.07, "learning_rate": 4.0726666666666665e-07, "loss": 2.5089, "step": 16000 }, { "epoch": 0.07, "eval_loss": 2.364738702774048, "eval_runtime": 237.3943, "eval_samples_per_second": 842.48, "eval_steps_per_second": 52.655, "step": 16000 }, { "epoch": 0.1, "eval_loss": 2.355194091796875, "eval_runtime": 237.9121, "eval_samples_per_second": 840.647, "eval_steps_per_second": 52.54, "step": 24000 }, { "epoch": 0.13, "learning_rate": 4.0453333333333336e-07, "loss": 2.4989, "step": 32000 }, { "epoch": 0.13, "eval_loss": 2.3528289794921875, "eval_runtime": 238.6586, "eval_samples_per_second": 838.017, "eval_steps_per_second": 52.376, "step": 32000 }, { "epoch": 0.17, "eval_loss": 2.3485841751098633, "eval_runtime": 239.5908, "eval_samples_per_second": 834.756, "eval_steps_per_second": 52.172, "step": 40000 }, { "epoch": 0.2, "learning_rate": 4.018e-07, "loss": 2.4836, "step": 48000 }, { "epoch": 0.2, "eval_loss": 2.34626841545105, "eval_runtime": 238.7656, "eval_samples_per_second": 837.642, "eval_steps_per_second": 52.353, "step": 48000 }, { "epoch": 0.24, "eval_loss": 2.3410613536834717, "eval_runtime": 240.0423, "eval_samples_per_second": 833.186, "eval_steps_per_second": 52.074, "step": 56000 }, { "epoch": 0.27, "learning_rate": 3.9906666666666667e-07, "loss": 2.4904, "step": 64000 }, { "epoch": 0.27, "eval_loss": 2.3394079208374023, "eval_runtime": 239.0867, "eval_samples_per_second": 836.517, "eval_steps_per_second": 52.282, "step": 64000 }, { "epoch": 0.3, "eval_loss": 2.3349857330322266, "eval_runtime": 238.1477, "eval_samples_per_second": 839.815, "eval_steps_per_second": 52.488, "step": 72000 }, { "epoch": 0.34, "learning_rate": 3.963333333333333e-07, "loss": 2.4733, "step": 80000 }, { "epoch": 0.34, "eval_loss": 2.330885410308838, "eval_runtime": 239.5566, "eval_samples_per_second": 834.876, "eval_steps_per_second": 52.18, "step": 80000 }, { "epoch": 0.37, "eval_loss": 2.3288557529449463, "eval_runtime": 239.3525, "eval_samples_per_second": 835.588, "eval_steps_per_second": 52.224, "step": 88000 }, { "epoch": 0.4, "learning_rate": 3.936e-07, "loss": 2.4675, "step": 96000 }, { "epoch": 0.4, "eval_loss": 2.3381102085113525, "eval_runtime": 239.1998, "eval_samples_per_second": 836.121, "eval_steps_per_second": 52.258, "step": 96000 }, { "epoch": 0.44, "eval_loss": 2.331658363342285, "eval_runtime": 237.7084, "eval_samples_per_second": 841.367, "eval_steps_per_second": 52.585, "step": 104000 }, { "epoch": 0.47, "learning_rate": 3.908666666666667e-07, "loss": 2.4762, "step": 112000 }, { "epoch": 0.47, "eval_loss": 2.3218166828155518, "eval_runtime": 239.0796, "eval_samples_per_second": 836.541, "eval_steps_per_second": 52.284, "step": 112000 }, { "epoch": 0.51, "eval_loss": 2.326749086380005, "eval_runtime": 239.0317, "eval_samples_per_second": 836.709, "eval_steps_per_second": 52.294, "step": 120000 }, { "epoch": 0.54, "learning_rate": 3.8813333333333334e-07, "loss": 2.4616, "step": 128000 }, { "epoch": 0.54, "eval_loss": 2.3240551948547363, "eval_runtime": 238.0739, "eval_samples_per_second": 840.075, "eval_steps_per_second": 52.505, "step": 128000 }, { "epoch": 0.57, "eval_loss": 2.3280670642852783, "eval_runtime": 238.6608, "eval_samples_per_second": 838.009, "eval_steps_per_second": 52.376, "step": 136000 }, { "epoch": 0.61, "learning_rate": 3.854e-07, "loss": 2.4601, "step": 144000 }, { "epoch": 0.61, "eval_loss": 2.315213680267334, "eval_runtime": 238.6484, "eval_samples_per_second": 838.053, "eval_steps_per_second": 52.378, "step": 144000 }, { "epoch": 0.64, "eval_loss": 2.3242688179016113, "eval_runtime": 239.0187, "eval_samples_per_second": 836.755, "eval_steps_per_second": 52.297, "step": 152000 }, { "epoch": 0.67, "learning_rate": 3.8266666666666665e-07, "loss": 2.4563, "step": 160000 }, { "epoch": 0.67, "eval_loss": 2.3201961517333984, "eval_runtime": 240.1294, "eval_samples_per_second": 832.884, "eval_steps_per_second": 52.055, "step": 160000 }, { "epoch": 0.71, "eval_loss": 2.3193206787109375, "eval_runtime": 241.1825, "eval_samples_per_second": 829.247, "eval_steps_per_second": 51.828, "step": 168000 }, { "epoch": 0.74, "learning_rate": 3.799333333333333e-07, "loss": 2.459, "step": 176000 }, { "epoch": 0.74, "eval_loss": 2.318166732788086, "eval_runtime": 238.5889, "eval_samples_per_second": 838.262, "eval_steps_per_second": 52.391, "step": 176000 }, { "epoch": 0.77, "eval_loss": 2.324726104736328, "eval_runtime": 238.4567, "eval_samples_per_second": 838.727, "eval_steps_per_second": 52.42, "step": 184000 }, { "epoch": 0.81, "learning_rate": 3.772e-07, "loss": 2.4639, "step": 192000 }, { "epoch": 0.81, "eval_loss": 2.320105791091919, "eval_runtime": 238.6268, "eval_samples_per_second": 838.129, "eval_steps_per_second": 52.383, "step": 192000 }, { "epoch": 0.84, "eval_loss": 2.3243095874786377, "eval_runtime": 239.9922, "eval_samples_per_second": 833.36, "eval_steps_per_second": 52.085, "step": 200000 }, { "epoch": 0.88, "learning_rate": 3.7446666666666667e-07, "loss": 2.4561, "step": 208000 }, { "epoch": 0.88, "eval_loss": 2.321760416030884, "eval_runtime": 239.3254, "eval_samples_per_second": 835.682, "eval_steps_per_second": 52.23, "step": 208000 }, { "epoch": 0.91, "eval_loss": 2.3137271404266357, "eval_runtime": 240.8808, "eval_samples_per_second": 830.286, "eval_steps_per_second": 51.893, "step": 216000 }, { "epoch": 0.94, "learning_rate": 3.7173333333333333e-07, "loss": 2.4556, "step": 224000 }, { "epoch": 0.94, "eval_loss": 2.318033456802368, "eval_runtime": 239.3207, "eval_samples_per_second": 835.699, "eval_steps_per_second": 52.231, "step": 224000 }, { "epoch": 0.98, "eval_loss": 2.314727783203125, "eval_runtime": 238.4213, "eval_samples_per_second": 838.851, "eval_steps_per_second": 52.428, "step": 232000 }, { "epoch": 1.01, "learning_rate": 3.69e-07, "loss": 2.4573, "step": 240000 }, { "epoch": 1.01, "eval_loss": 2.309976100921631, "eval_runtime": 238.672, "eval_samples_per_second": 837.97, "eval_steps_per_second": 52.373, "step": 240000 }, { "epoch": 1.04, "eval_loss": 2.311811923980713, "eval_runtime": 239.3065, "eval_samples_per_second": 835.748, "eval_steps_per_second": 52.234, "step": 248000 }, { "epoch": 1.08, "learning_rate": 3.6626666666666664e-07, "loss": 2.4516, "step": 256000 }, { "epoch": 1.08, "eval_loss": 2.315763235092163, "eval_runtime": 239.6224, "eval_samples_per_second": 834.646, "eval_steps_per_second": 52.165, "step": 256000 }, { "epoch": 1.11, "eval_loss": 2.3132565021514893, "eval_runtime": 241.721, "eval_samples_per_second": 827.4, "eval_steps_per_second": 51.713, "step": 264000 }, { "epoch": 1.15, "learning_rate": 3.6353333333333335e-07, "loss": 2.4561, "step": 272000 }, { "epoch": 1.15, "eval_loss": 2.3065385818481445, "eval_runtime": 241.1529, "eval_samples_per_second": 829.349, "eval_steps_per_second": 51.834, "step": 272000 }, { "epoch": 1.18, "eval_loss": 2.318328857421875, "eval_runtime": 240.9143, "eval_samples_per_second": 830.171, "eval_steps_per_second": 51.886, "step": 280000 }, { "epoch": 1.21, "learning_rate": 3.608e-07, "loss": 2.4476, "step": 288000 }, { "epoch": 1.21, "eval_loss": 2.310638666152954, "eval_runtime": 240.8865, "eval_samples_per_second": 830.267, "eval_steps_per_second": 51.892, "step": 288000 }, { "epoch": 1.25, "eval_loss": 2.3131144046783447, "eval_runtime": 238.8251, "eval_samples_per_second": 837.433, "eval_steps_per_second": 52.34, "step": 296000 }, { "epoch": 1.28, "learning_rate": 3.5806666666666666e-07, "loss": 2.4503, "step": 304000 }, { "epoch": 1.28, "eval_loss": 2.3103673458099365, "eval_runtime": 238.9913, "eval_samples_per_second": 836.851, "eval_steps_per_second": 52.303, "step": 304000 }, { "epoch": 1.31, "eval_loss": 2.310051679611206, "eval_runtime": 239.4298, "eval_samples_per_second": 835.318, "eval_steps_per_second": 52.207, "step": 312000 }, { "epoch": 1.35, "learning_rate": 3.553333333333333e-07, "loss": 2.4495, "step": 320000 }, { "epoch": 1.35, "eval_loss": 2.3085968494415283, "eval_runtime": 239.7653, "eval_samples_per_second": 834.149, "eval_steps_per_second": 52.134, "step": 320000 }, { "epoch": 1.38, "eval_loss": 2.3057291507720947, "eval_runtime": 240.7073, "eval_samples_per_second": 830.885, "eval_steps_per_second": 51.93, "step": 328000 }, { "epoch": 1.41, "learning_rate": 3.5259999999999997e-07, "loss": 2.4534, "step": 336000 }, { "epoch": 1.41, "eval_loss": 2.3085904121398926, "eval_runtime": 240.303, "eval_samples_per_second": 832.282, "eval_steps_per_second": 52.018, "step": 336000 }, { "epoch": 1.45, "eval_loss": 2.309332847595215, "eval_runtime": 240.8676, "eval_samples_per_second": 830.332, "eval_steps_per_second": 51.896, "step": 344000 }, { "epoch": 1.48, "learning_rate": 3.498666666666667e-07, "loss": 2.4486, "step": 352000 }, { "epoch": 1.48, "eval_loss": 2.3018343448638916, "eval_runtime": 241.3997, "eval_samples_per_second": 828.501, "eval_steps_per_second": 51.781, "step": 352000 }, { "epoch": 1.52, "eval_loss": 2.305995464324951, "eval_runtime": 239.2635, "eval_samples_per_second": 835.898, "eval_steps_per_second": 52.244, "step": 360000 }, { "epoch": 1.55, "learning_rate": 3.4713333333333333e-07, "loss": 2.4457, "step": 368000 }, { "epoch": 1.55, "eval_loss": 2.3082966804504395, "eval_runtime": 239.5152, "eval_samples_per_second": 835.02, "eval_steps_per_second": 52.189, "step": 368000 }, { "epoch": 1.58, "eval_loss": 2.3109591007232666, "eval_runtime": 240.8072, "eval_samples_per_second": 830.54, "eval_steps_per_second": 51.909, "step": 376000 }, { "epoch": 1.62, "learning_rate": 3.444e-07, "loss": 2.4443, "step": 384000 }, { "epoch": 1.62, "eval_loss": 2.297455310821533, "eval_runtime": 240.6668, "eval_samples_per_second": 831.024, "eval_steps_per_second": 51.939, "step": 384000 }, { "epoch": 1.65, "eval_loss": 2.300872564315796, "eval_runtime": 240.6097, "eval_samples_per_second": 831.222, "eval_steps_per_second": 51.951, "step": 392000 }, { "epoch": 1.68, "learning_rate": 3.416666666666667e-07, "loss": 2.4405, "step": 400000 }, { "epoch": 1.68, "eval_loss": 2.3067097663879395, "eval_runtime": 240.2686, "eval_samples_per_second": 832.402, "eval_steps_per_second": 52.025, "step": 400000 }, { "epoch": 1.72, "eval_loss": 2.30265212059021, "eval_runtime": 240.64, "eval_samples_per_second": 831.117, "eval_steps_per_second": 51.945, "step": 408000 }, { "epoch": 1.75, "learning_rate": 3.3893333333333335e-07, "loss": 2.4531, "step": 416000 }, { "epoch": 1.75, "eval_loss": 2.3050363063812256, "eval_runtime": 240.3135, "eval_samples_per_second": 832.246, "eval_steps_per_second": 52.015, "step": 416000 }, { "epoch": 1.79, "eval_loss": 2.302565574645996, "eval_runtime": 239.3844, "eval_samples_per_second": 835.476, "eval_steps_per_second": 52.217, "step": 424000 }, { "epoch": 1.82, "learning_rate": 3.3619999999999995e-07, "loss": 2.4539, "step": 432000 }, { "epoch": 1.82, "eval_loss": 2.2928755283355713, "eval_runtime": 241.5914, "eval_samples_per_second": 827.844, "eval_steps_per_second": 51.74, "step": 432000 }, { "epoch": 1.85, "eval_loss": 2.305102825164795, "eval_runtime": 241.5249, "eval_samples_per_second": 828.072, "eval_steps_per_second": 51.754, "step": 440000 }, { "epoch": 1.89, "learning_rate": 3.3346666666666666e-07, "loss": 2.4499, "step": 448000 }, { "epoch": 1.89, "eval_loss": 2.3034677505493164, "eval_runtime": 241.4767, "eval_samples_per_second": 828.237, "eval_steps_per_second": 51.765, "step": 448000 }, { "epoch": 1.92, "eval_loss": 2.301123857498169, "eval_runtime": 239.2116, "eval_samples_per_second": 836.08, "eval_steps_per_second": 52.255, "step": 456000 }, { "epoch": 1.95, "learning_rate": 3.307333333333333e-07, "loss": 2.4401, "step": 464000 }, { "epoch": 1.95, "eval_loss": 2.291990280151367, "eval_runtime": 241.865, "eval_samples_per_second": 826.907, "eval_steps_per_second": 51.682, "step": 464000 }, { "epoch": 1.99, "eval_loss": 2.299881935119629, "eval_runtime": 239.8579, "eval_samples_per_second": 833.827, "eval_steps_per_second": 52.114, "step": 472000 }, { "epoch": 2.02, "learning_rate": 3.28e-07, "loss": 2.4401, "step": 480000 }, { "epoch": 2.02, "eval_loss": 2.303424119949341, "eval_runtime": 240.1835, "eval_samples_per_second": 832.697, "eval_steps_per_second": 52.044, "step": 480000 }, { "epoch": 2.05, "eval_loss": 2.30208683013916, "eval_runtime": 239.7131, "eval_samples_per_second": 834.331, "eval_steps_per_second": 52.146, "step": 488000 }, { "epoch": 2.09, "learning_rate": 3.252666666666667e-07, "loss": 2.4433, "step": 496000 }, { "epoch": 2.09, "eval_loss": 2.3102383613586426, "eval_runtime": 239.6539, "eval_samples_per_second": 834.537, "eval_steps_per_second": 52.159, "step": 496000 }, { "epoch": 2.12, "eval_loss": 2.2985267639160156, "eval_runtime": 239.3303, "eval_samples_per_second": 835.665, "eval_steps_per_second": 52.229, "step": 504000 }, { "epoch": 2.16, "learning_rate": 3.2253333333333334e-07, "loss": 2.4445, "step": 512000 }, { "epoch": 2.16, "eval_loss": 2.3018200397491455, "eval_runtime": 239.5684, "eval_samples_per_second": 834.835, "eval_steps_per_second": 52.177, "step": 512000 }, { "epoch": 2.19, "eval_loss": 2.2995855808258057, "eval_runtime": 242.6094, "eval_samples_per_second": 824.37, "eval_steps_per_second": 51.523, "step": 520000 }, { "epoch": 2.22, "learning_rate": 3.198e-07, "loss": 2.4379, "step": 528000 }, { "epoch": 2.22, "eval_loss": 2.3006343841552734, "eval_runtime": 241.6485, "eval_samples_per_second": 827.648, "eval_steps_per_second": 51.728, "step": 528000 }, { "epoch": 2.26, "eval_loss": 2.2969799041748047, "eval_runtime": 240.3777, "eval_samples_per_second": 832.024, "eval_steps_per_second": 52.001, "step": 536000 }, { "epoch": 2.29, "learning_rate": 3.1706666666666665e-07, "loss": 2.4454, "step": 544000 }, { "epoch": 2.29, "eval_loss": 2.30135178565979, "eval_runtime": 241.5787, "eval_samples_per_second": 827.887, "eval_steps_per_second": 51.743, "step": 544000 }, { "epoch": 2.32, "eval_loss": 2.2992091178894043, "eval_runtime": 241.4245, "eval_samples_per_second": 828.416, "eval_steps_per_second": 51.776, "step": 552000 }, { "epoch": 2.36, "learning_rate": 3.1433333333333336e-07, "loss": 2.4457, "step": 560000 }, { "epoch": 2.36, "eval_loss": 2.2961883544921875, "eval_runtime": 242.857, "eval_samples_per_second": 823.53, "eval_steps_per_second": 51.471, "step": 560000 }, { "epoch": 2.39, "eval_loss": 2.300924301147461, "eval_runtime": 253.3263, "eval_samples_per_second": 789.495, "eval_steps_per_second": 49.343, "step": 568000 }, { "epoch": 2.43, "learning_rate": 3.116e-07, "loss": 2.4354, "step": 576000 }, { "epoch": 2.43, "eval_loss": 2.2960214614868164, "eval_runtime": 251.8236, "eval_samples_per_second": 794.207, "eval_steps_per_second": 49.638, "step": 576000 }, { "epoch": 2.46, "eval_loss": 2.3008430004119873, "eval_runtime": 255.1, "eval_samples_per_second": 784.006, "eval_steps_per_second": 49.0, "step": 584000 }, { "epoch": 2.49, "learning_rate": 3.0886666666666667e-07, "loss": 2.4361, "step": 592000 }, { "epoch": 2.49, "eval_loss": 2.289783239364624, "eval_runtime": 253.1447, "eval_samples_per_second": 790.062, "eval_steps_per_second": 49.379, "step": 592000 }, { "epoch": 2.53, "eval_loss": 2.306014060974121, "eval_runtime": 253.3664, "eval_samples_per_second": 789.371, "eval_steps_per_second": 49.336, "step": 600000 }, { "epoch": 2.56, "learning_rate": 3.061333333333333e-07, "loss": 2.4377, "step": 608000 }, { "epoch": 2.56, "eval_loss": 2.298966884613037, "eval_runtime": 254.531, "eval_samples_per_second": 785.759, "eval_steps_per_second": 49.11, "step": 608000 }, { "epoch": 2.59, "eval_loss": 2.298924684524536, "eval_runtime": 253.6979, "eval_samples_per_second": 788.339, "eval_steps_per_second": 49.271, "step": 616000 }, { "epoch": 2.63, "learning_rate": 3.034e-07, "loss": 2.4416, "step": 624000 }, { "epoch": 2.63, "eval_loss": 2.296922206878662, "eval_runtime": 252.7348, "eval_samples_per_second": 791.343, "eval_steps_per_second": 49.459, "step": 624000 }, { "epoch": 2.66, "eval_loss": 2.2932701110839844, "eval_runtime": 249.9057, "eval_samples_per_second": 800.302, "eval_steps_per_second": 50.019, "step": 632000 }, { "epoch": 2.69, "learning_rate": 3.0066666666666663e-07, "loss": 2.434, "step": 640000 }, { "epoch": 2.69, "eval_loss": 2.29982852935791, "eval_runtime": 250.7868, "eval_samples_per_second": 797.49, "eval_steps_per_second": 49.843, "step": 640000 }, { "epoch": 2.73, "eval_loss": 2.294802665710449, "eval_runtime": 251.6635, "eval_samples_per_second": 794.712, "eval_steps_per_second": 49.669, "step": 648000 }, { "epoch": 2.76, "learning_rate": 2.9793333333333334e-07, "loss": 2.433, "step": 656000 }, { "epoch": 2.76, "eval_loss": 2.2891652584075928, "eval_runtime": 250.4466, "eval_samples_per_second": 798.574, "eval_steps_per_second": 49.911, "step": 656000 }, { "epoch": 2.8, "eval_loss": 2.292858362197876, "eval_runtime": 254.5309, "eval_samples_per_second": 785.759, "eval_steps_per_second": 49.11, "step": 664000 }, { "epoch": 2.83, "learning_rate": 2.952e-07, "loss": 2.44, "step": 672000 }, { "epoch": 2.83, "eval_loss": 2.2973103523254395, "eval_runtime": 250.641, "eval_samples_per_second": 797.954, "eval_steps_per_second": 49.872, "step": 672000 }, { "epoch": 2.86, "eval_loss": 2.292598009109497, "eval_runtime": 250.7654, "eval_samples_per_second": 797.558, "eval_steps_per_second": 49.847, "step": 680000 }, { "epoch": 2.9, "learning_rate": 2.9246666666666665e-07, "loss": 2.4291, "step": 688000 }, { "epoch": 2.9, "eval_loss": 2.298957109451294, "eval_runtime": 249.6027, "eval_samples_per_second": 801.273, "eval_steps_per_second": 50.08, "step": 688000 }, { "epoch": 2.93, "eval_loss": 2.2936558723449707, "eval_runtime": 254.0075, "eval_samples_per_second": 787.378, "eval_steps_per_second": 49.211, "step": 696000 }, { "epoch": 2.96, "learning_rate": 2.897333333333333e-07, "loss": 2.4336, "step": 704000 }, { "epoch": 2.96, "eval_loss": 2.2894132137298584, "eval_runtime": 251.6948, "eval_samples_per_second": 794.613, "eval_steps_per_second": 49.663, "step": 704000 }, { "epoch": 3.0, "eval_loss": 2.295807123184204, "eval_runtime": 250.3917, "eval_samples_per_second": 798.748, "eval_steps_per_second": 49.922, "step": 712000 }, { "epoch": 3.03, "learning_rate": 2.8699999999999996e-07, "loss": 2.439, "step": 720000 }, { "epoch": 3.03, "eval_loss": 2.295633554458618, "eval_runtime": 249.5836, "eval_samples_per_second": 801.335, "eval_steps_per_second": 50.083, "step": 720000 }, { "epoch": 3.07, "eval_loss": 2.2927510738372803, "eval_runtime": 252.6433, "eval_samples_per_second": 791.63, "eval_steps_per_second": 49.477, "step": 728000 }, { "epoch": 3.1, "learning_rate": 2.8426666666666667e-07, "loss": 2.4405, "step": 736000 }, { "epoch": 3.1, "eval_loss": 2.295588970184326, "eval_runtime": 254.3928, "eval_samples_per_second": 786.186, "eval_steps_per_second": 49.137, "step": 736000 }, { "epoch": 3.13, "eval_loss": 2.290548324584961, "eval_runtime": 251.5538, "eval_samples_per_second": 795.058, "eval_steps_per_second": 49.691, "step": 744000 }, { "epoch": 3.17, "learning_rate": 2.815333333333333e-07, "loss": 2.4332, "step": 752000 }, { "epoch": 3.17, "eval_loss": 2.2921102046966553, "eval_runtime": 250.7706, "eval_samples_per_second": 797.542, "eval_steps_per_second": 49.846, "step": 752000 }, { "epoch": 3.2, "eval_loss": 2.2907326221466064, "eval_runtime": 252.731, "eval_samples_per_second": 791.355, "eval_steps_per_second": 49.46, "step": 760000 }, { "epoch": 3.23, "learning_rate": 2.7880000000000003e-07, "loss": 2.4353, "step": 768000 }, { "epoch": 3.23, "eval_loss": 2.2879087924957275, "eval_runtime": 249.8593, "eval_samples_per_second": 800.45, "eval_steps_per_second": 50.028, "step": 768000 }, { "epoch": 3.27, "eval_loss": 2.292853355407715, "eval_runtime": 255.5473, "eval_samples_per_second": 782.634, "eval_steps_per_second": 48.915, "step": 776000 }, { "epoch": 3.3, "learning_rate": 2.7606666666666664e-07, "loss": 2.4273, "step": 784000 }, { "epoch": 3.3, "eval_loss": 2.29532527923584, "eval_runtime": 249.121, "eval_samples_per_second": 802.823, "eval_steps_per_second": 50.176, "step": 784000 }, { "epoch": 3.33, "eval_loss": 2.2917468547821045, "eval_runtime": 252.7811, "eval_samples_per_second": 791.198, "eval_steps_per_second": 49.45, "step": 792000 }, { "epoch": 3.37, "learning_rate": 2.733333333333333e-07, "loss": 2.4233, "step": 800000 }, { "epoch": 3.37, "eval_loss": 2.294677734375, "eval_runtime": 251.4379, "eval_samples_per_second": 795.425, "eval_steps_per_second": 49.714, "step": 800000 }, { "epoch": 3.4, "eval_loss": 2.294262170791626, "eval_runtime": 249.692, "eval_samples_per_second": 800.987, "eval_steps_per_second": 50.062, "step": 808000 }, { "epoch": 3.44, "learning_rate": 2.706e-07, "loss": 2.4324, "step": 816000 }, { "epoch": 3.44, "eval_loss": 2.2940258979797363, "eval_runtime": 251.4645, "eval_samples_per_second": 795.341, "eval_steps_per_second": 49.709, "step": 816000 }, { "epoch": 3.47, "eval_loss": 2.291130781173706, "eval_runtime": 250.9475, "eval_samples_per_second": 796.979, "eval_steps_per_second": 49.811, "step": 824000 }, { "epoch": 3.5, "learning_rate": 2.6786666666666666e-07, "loss": 2.4461, "step": 832000 }, { "epoch": 3.5, "eval_loss": 2.2919955253601074, "eval_runtime": 251.5475, "eval_samples_per_second": 795.078, "eval_steps_per_second": 49.692, "step": 832000 }, { "epoch": 3.54, "eval_loss": 2.291116952896118, "eval_runtime": 247.564, "eval_samples_per_second": 807.872, "eval_steps_per_second": 50.492, "step": 840000 }, { "epoch": 3.57, "learning_rate": 2.651333333333333e-07, "loss": 2.4267, "step": 848000 }, { "epoch": 3.57, "eval_loss": 2.294036865234375, "eval_runtime": 248.8076, "eval_samples_per_second": 803.834, "eval_steps_per_second": 50.24, "step": 848000 }, { "epoch": 3.6, "eval_loss": 2.2889564037323, "eval_runtime": 250.7173, "eval_samples_per_second": 797.711, "eval_steps_per_second": 49.857, "step": 856000 }, { "epoch": 3.64, "learning_rate": 2.624e-07, "loss": 2.4313, "step": 864000 }, { "epoch": 3.64, "eval_loss": 2.2913272380828857, "eval_runtime": 251.8602, "eval_samples_per_second": 794.091, "eval_steps_per_second": 49.631, "step": 864000 }, { "epoch": 3.67, "eval_loss": 2.296712875366211, "eval_runtime": 250.3469, "eval_samples_per_second": 798.891, "eval_steps_per_second": 49.931, "step": 872000 }, { "epoch": 3.71, "learning_rate": 2.596666666666667e-07, "loss": 2.4388, "step": 880000 }, { "epoch": 3.71, "eval_loss": 2.29068660736084, "eval_runtime": 252.8984, "eval_samples_per_second": 790.831, "eval_steps_per_second": 49.427, "step": 880000 }, { "epoch": 3.74, "eval_loss": 2.295198440551758, "eval_runtime": 250.9204, "eval_samples_per_second": 797.065, "eval_steps_per_second": 49.817, "step": 888000 }, { "epoch": 3.77, "learning_rate": 2.5693333333333333e-07, "loss": 2.4326, "step": 896000 }, { "epoch": 3.77, "eval_loss": 2.287299394607544, "eval_runtime": 250.3015, "eval_samples_per_second": 799.036, "eval_steps_per_second": 49.94, "step": 896000 }, { "epoch": 3.81, "eval_loss": 2.287095308303833, "eval_runtime": 251.9856, "eval_samples_per_second": 793.696, "eval_steps_per_second": 49.606, "step": 904000 }, { "epoch": 3.84, "learning_rate": 2.542e-07, "loss": 2.4312, "step": 912000 }, { "epoch": 3.84, "eval_loss": 2.2880072593688965, "eval_runtime": 250.0793, "eval_samples_per_second": 799.746, "eval_steps_per_second": 49.984, "step": 912000 }, { "epoch": 3.87, "eval_loss": 2.2941174507141113, "eval_runtime": 250.5896, "eval_samples_per_second": 798.118, "eval_steps_per_second": 49.882, "step": 920000 }, { "epoch": 3.91, "learning_rate": 2.5146666666666664e-07, "loss": 2.4398, "step": 928000 }, { "epoch": 3.91, "eval_loss": 2.292538642883301, "eval_runtime": 248.6711, "eval_samples_per_second": 804.275, "eval_steps_per_second": 50.267, "step": 928000 }, { "epoch": 3.94, "eval_loss": 2.296539545059204, "eval_runtime": 250.5232, "eval_samples_per_second": 798.329, "eval_steps_per_second": 49.896, "step": 936000 }, { "epoch": 3.97, "learning_rate": 2.4873333333333335e-07, "loss": 2.441, "step": 944000 }, { "epoch": 3.97, "eval_loss": 2.289044141769409, "eval_runtime": 251.7273, "eval_samples_per_second": 794.511, "eval_steps_per_second": 49.657, "step": 944000 }, { "epoch": 4.01, "eval_loss": 2.2945752143859863, "eval_runtime": 250.3966, "eval_samples_per_second": 798.733, "eval_steps_per_second": 49.921, "step": 952000 }, { "epoch": 4.04, "learning_rate": 2.46e-07, "loss": 2.4345, "step": 960000 }, { "epoch": 4.04, "eval_loss": 2.291003704071045, "eval_runtime": 250.1466, "eval_samples_per_second": 799.531, "eval_steps_per_second": 49.971, "step": 960000 }, { "epoch": 4.08, "eval_loss": 2.279160737991333, "eval_runtime": 251.3138, "eval_samples_per_second": 795.818, "eval_steps_per_second": 49.739, "step": 968000 }, { "epoch": 4.11, "learning_rate": 2.4326666666666666e-07, "loss": 2.4332, "step": 976000 }, { "epoch": 4.11, "eval_loss": 2.285647392272949, "eval_runtime": 251.6666, "eval_samples_per_second": 794.702, "eval_steps_per_second": 49.669, "step": 976000 }, { "epoch": 4.14, "eval_loss": 2.2878894805908203, "eval_runtime": 249.5029, "eval_samples_per_second": 801.594, "eval_steps_per_second": 50.1, "step": 984000 }, { "epoch": 4.18, "learning_rate": 2.405333333333333e-07, "loss": 2.4375, "step": 992000 }, { "epoch": 4.18, "eval_loss": 2.2861104011535645, "eval_runtime": 249.5689, "eval_samples_per_second": 801.382, "eval_steps_per_second": 50.086, "step": 992000 }, { "epoch": 4.21, "eval_loss": 2.2892367839813232, "eval_runtime": 250.1781, "eval_samples_per_second": 799.43, "eval_steps_per_second": 49.964, "step": 1000000 }, { "epoch": 4.24, "learning_rate": 2.3779999999999997e-07, "loss": 2.4282, "step": 1008000 }, { "epoch": 4.24, "eval_loss": 2.2894413471221924, "eval_runtime": 252.3508, "eval_samples_per_second": 792.548, "eval_steps_per_second": 49.534, "step": 1008000 }, { "epoch": 4.28, "eval_loss": 2.290153980255127, "eval_runtime": 250.4413, "eval_samples_per_second": 798.59, "eval_steps_per_second": 49.912, "step": 1016000 }, { "epoch": 4.31, "learning_rate": 2.3506666666666668e-07, "loss": 2.4231, "step": 1024000 }, { "epoch": 4.31, "eval_loss": 2.282951831817627, "eval_runtime": 251.0777, "eval_samples_per_second": 796.566, "eval_steps_per_second": 49.785, "step": 1024000 }, { "epoch": 4.35, "eval_loss": 2.294783353805542, "eval_runtime": 249.9563, "eval_samples_per_second": 800.14, "eval_steps_per_second": 50.009, "step": 1032000 }, { "epoch": 4.38, "learning_rate": 2.3233333333333334e-07, "loss": 2.4299, "step": 1040000 }, { "epoch": 4.38, "eval_loss": 2.2915213108062744, "eval_runtime": 251.5141, "eval_samples_per_second": 795.184, "eval_steps_per_second": 49.699, "step": 1040000 }, { "epoch": 4.41, "eval_loss": 2.2921574115753174, "eval_runtime": 250.6578, "eval_samples_per_second": 797.9, "eval_steps_per_second": 49.869, "step": 1048000 }, { "epoch": 4.45, "learning_rate": 2.2960000000000002e-07, "loss": 2.4353, "step": 1056000 }, { "epoch": 4.45, "eval_loss": 2.287623882293701, "eval_runtime": 249.9183, "eval_samples_per_second": 800.262, "eval_steps_per_second": 50.016, "step": 1056000 }, { "epoch": 4.48, "eval_loss": 2.2892725467681885, "eval_runtime": 249.4102, "eval_samples_per_second": 801.892, "eval_steps_per_second": 50.118, "step": 1064000 }, { "epoch": 4.51, "learning_rate": 2.2686666666666667e-07, "loss": 2.4308, "step": 1072000 }, { "epoch": 4.51, "eval_loss": 2.292046070098877, "eval_runtime": 247.5504, "eval_samples_per_second": 807.916, "eval_steps_per_second": 50.495, "step": 1072000 }, { "epoch": 4.55, "eval_loss": 2.285968780517578, "eval_runtime": 247.5335, "eval_samples_per_second": 807.971, "eval_steps_per_second": 50.498, "step": 1080000 }, { "epoch": 4.58, "learning_rate": 2.2413333333333333e-07, "loss": 2.4358, "step": 1088000 }, { "epoch": 4.58, "eval_loss": 2.2907297611236572, "eval_runtime": 248.9275, "eval_samples_per_second": 803.447, "eval_steps_per_second": 50.215, "step": 1088000 }, { "epoch": 4.61, "eval_loss": 2.2807979583740234, "eval_runtime": 249.3116, "eval_samples_per_second": 802.209, "eval_steps_per_second": 50.138, "step": 1096000 }, { "epoch": 4.65, "learning_rate": 2.214e-07, "loss": 2.4341, "step": 1104000 }, { "epoch": 4.65, "eval_loss": 2.290224313735962, "eval_runtime": 249.5198, "eval_samples_per_second": 801.54, "eval_steps_per_second": 50.096, "step": 1104000 }, { "epoch": 4.68, "eval_loss": 2.281525135040283, "eval_runtime": 249.0717, "eval_samples_per_second": 802.982, "eval_steps_per_second": 50.186, "step": 1112000 }, { "epoch": 4.72, "learning_rate": 2.1866666666666667e-07, "loss": 2.4315, "step": 1120000 }, { "epoch": 4.72, "eval_loss": 2.296067237854004, "eval_runtime": 246.4788, "eval_samples_per_second": 811.429, "eval_steps_per_second": 50.714, "step": 1120000 }, { "epoch": 4.75, "eval_loss": 2.288472890853882, "eval_runtime": 243.6257, "eval_samples_per_second": 820.932, "eval_steps_per_second": 51.308, "step": 1128000 }, { "epoch": 4.78, "learning_rate": 2.1593333333333332e-07, "loss": 2.434, "step": 1136000 }, { "epoch": 4.78, "eval_loss": 2.2916722297668457, "eval_runtime": 242.6981, "eval_samples_per_second": 824.069, "eval_steps_per_second": 51.504, "step": 1136000 }, { "epoch": 4.82, "eval_loss": 2.285139560699463, "eval_runtime": 243.3147, "eval_samples_per_second": 821.981, "eval_steps_per_second": 51.374, "step": 1144000 }, { "epoch": 4.85, "learning_rate": 2.132e-07, "loss": 2.4324, "step": 1152000 }, { "epoch": 4.85, "eval_loss": 2.2837140560150146, "eval_runtime": 243.7667, "eval_samples_per_second": 820.457, "eval_steps_per_second": 51.279, "step": 1152000 }, { "epoch": 4.88, "eval_loss": 2.288276195526123, "eval_runtime": 243.0553, "eval_samples_per_second": 822.858, "eval_steps_per_second": 51.429, "step": 1160000 }, { "epoch": 4.92, "learning_rate": 2.1046666666666666e-07, "loss": 2.4297, "step": 1168000 }, { "epoch": 4.92, "eval_loss": 2.282362222671509, "eval_runtime": 243.4119, "eval_samples_per_second": 821.652, "eval_steps_per_second": 51.353, "step": 1168000 }, { "epoch": 4.95, "eval_loss": 2.283245086669922, "eval_runtime": 242.8004, "eval_samples_per_second": 823.722, "eval_steps_per_second": 51.483, "step": 1176000 }, { "epoch": 4.99, "learning_rate": 2.0773333333333334e-07, "loss": 2.436, "step": 1184000 }, { "epoch": 4.99, "eval_loss": 2.286545515060425, "eval_runtime": 243.7626, "eval_samples_per_second": 820.471, "eval_steps_per_second": 51.279, "step": 1184000 }, { "epoch": 5.02, "eval_loss": 2.281639337539673, "eval_runtime": 242.7206, "eval_samples_per_second": 823.993, "eval_steps_per_second": 51.5, "step": 1192000 }, { "epoch": 5.05, "learning_rate": 2.05e-07, "loss": 2.4329, "step": 1200000 }, { "epoch": 5.05, "eval_loss": 2.2862467765808105, "eval_runtime": 242.9508, "eval_samples_per_second": 823.212, "eval_steps_per_second": 51.451, "step": 1200000 }, { "epoch": 5.09, "eval_loss": 2.284660577774048, "eval_runtime": 244.035, "eval_samples_per_second": 819.554, "eval_steps_per_second": 51.222, "step": 1208000 }, { "epoch": 5.12, "learning_rate": 2.0226666666666668e-07, "loss": 2.4276, "step": 1216000 }, { "epoch": 5.12, "eval_loss": 2.295132875442505, "eval_runtime": 243.547, "eval_samples_per_second": 821.197, "eval_steps_per_second": 51.325, "step": 1216000 }, { "epoch": 5.15, "eval_loss": 2.297971487045288, "eval_runtime": 243.0459, "eval_samples_per_second": 822.89, "eval_steps_per_second": 51.431, "step": 1224000 }, { "epoch": 5.19, "learning_rate": 1.9953333333333333e-07, "loss": 2.4362, "step": 1232000 }, { "epoch": 5.19, "eval_loss": 2.2889420986175537, "eval_runtime": 245.3823, "eval_samples_per_second": 815.055, "eval_steps_per_second": 50.941, "step": 1232000 }, { "epoch": 5.22, "eval_loss": 2.2913663387298584, "eval_runtime": 243.0295, "eval_samples_per_second": 822.945, "eval_steps_per_second": 51.434, "step": 1240000 }, { "epoch": 5.25, "learning_rate": 1.968e-07, "loss": 2.4309, "step": 1248000 }, { "epoch": 5.25, "eval_loss": 2.2914836406707764, "eval_runtime": 245.1791, "eval_samples_per_second": 815.73, "eval_steps_per_second": 50.983, "step": 1248000 }, { "epoch": 5.29, "eval_loss": 2.2822399139404297, "eval_runtime": 243.1242, "eval_samples_per_second": 822.625, "eval_steps_per_second": 51.414, "step": 1256000 }, { "epoch": 5.32, "learning_rate": 1.9406666666666667e-07, "loss": 2.4414, "step": 1264000 }, { "epoch": 5.32, "eval_loss": 2.287137269973755, "eval_runtime": 243.2167, "eval_samples_per_second": 822.312, "eval_steps_per_second": 51.395, "step": 1264000 }, { "epoch": 5.36, "eval_loss": 2.2890405654907227, "eval_runtime": 246.4538, "eval_samples_per_second": 811.511, "eval_steps_per_second": 50.719, "step": 1272000 }, { "epoch": 5.39, "learning_rate": 1.9133333333333333e-07, "loss": 2.4241, "step": 1280000 }, { "epoch": 5.39, "eval_loss": 2.2844035625457764, "eval_runtime": 243.8151, "eval_samples_per_second": 820.294, "eval_steps_per_second": 51.268, "step": 1280000 }, { "epoch": 5.42, "eval_loss": 2.281158924102783, "eval_runtime": 244.4232, "eval_samples_per_second": 818.253, "eval_steps_per_second": 51.141, "step": 1288000 }, { "epoch": 5.46, "learning_rate": 1.886e-07, "loss": 2.4251, "step": 1296000 }, { "epoch": 5.46, "eval_loss": 2.2873995304107666, "eval_runtime": 245.6666, "eval_samples_per_second": 814.111, "eval_steps_per_second": 50.882, "step": 1296000 }, { "epoch": 5.49, "eval_loss": 2.2846264839172363, "eval_runtime": 244.5062, "eval_samples_per_second": 817.975, "eval_steps_per_second": 51.123, "step": 1304000 }, { "epoch": 5.52, "learning_rate": 1.8586666666666666e-07, "loss": 2.4318, "step": 1312000 }, { "epoch": 5.52, "eval_loss": 2.283116340637207, "eval_runtime": 245.4301, "eval_samples_per_second": 814.896, "eval_steps_per_second": 50.931, "step": 1312000 }, { "epoch": 5.56, "eval_loss": 2.2895309925079346, "eval_runtime": 244.3142, "eval_samples_per_second": 818.618, "eval_steps_per_second": 51.164, "step": 1320000 }, { "epoch": 5.59, "learning_rate": 1.8313333333333332e-07, "loss": 2.4247, "step": 1328000 }, { "epoch": 5.59, "eval_loss": 2.2795751094818115, "eval_runtime": 246.3547, "eval_samples_per_second": 811.838, "eval_steps_per_second": 50.74, "step": 1328000 }, { "epoch": 5.63, "eval_loss": 2.2833728790283203, "eval_runtime": 244.3038, "eval_samples_per_second": 818.653, "eval_steps_per_second": 51.166, "step": 1336000 }, { "epoch": 5.66, "learning_rate": 1.804e-07, "loss": 2.4305, "step": 1344000 }, { "epoch": 5.66, "eval_loss": 2.2810542583465576, "eval_runtime": 245.8027, "eval_samples_per_second": 813.661, "eval_steps_per_second": 50.854, "step": 1344000 }, { "epoch": 5.69, "eval_loss": 2.2922263145446777, "eval_runtime": 244.3162, "eval_samples_per_second": 818.611, "eval_steps_per_second": 51.163, "step": 1352000 }, { "epoch": 5.73, "learning_rate": 1.7766666666666666e-07, "loss": 2.4336, "step": 1360000 }, { "epoch": 5.73, "eval_loss": 2.2830028533935547, "eval_runtime": 245.6363, "eval_samples_per_second": 814.212, "eval_steps_per_second": 50.888, "step": 1360000 }, { "epoch": 5.76, "eval_loss": 2.2903530597686768, "eval_runtime": 244.3808, "eval_samples_per_second": 818.395, "eval_steps_per_second": 51.15, "step": 1368000 }, { "epoch": 5.79, "learning_rate": 1.7493333333333334e-07, "loss": 2.428, "step": 1376000 }, { "epoch": 5.79, "eval_loss": 2.284269332885742, "eval_runtime": 243.8962, "eval_samples_per_second": 820.021, "eval_steps_per_second": 51.251, "step": 1376000 }, { "epoch": 5.83, "eval_loss": 2.2804477214813232, "eval_runtime": 243.7523, "eval_samples_per_second": 820.505, "eval_steps_per_second": 51.282, "step": 1384000 }, { "epoch": 5.86, "learning_rate": 1.722e-07, "loss": 2.4254, "step": 1392000 }, { "epoch": 5.86, "eval_loss": 2.285198211669922, "eval_runtime": 244.494, "eval_samples_per_second": 818.016, "eval_steps_per_second": 51.126, "step": 1392000 }, { "epoch": 5.89, "eval_loss": 2.285767078399658, "eval_runtime": 245.3316, "eval_samples_per_second": 815.223, "eval_steps_per_second": 50.951, "step": 1400000 }, { "epoch": 5.93, "learning_rate": 1.6946666666666668e-07, "loss": 2.4287, "step": 1408000 }, { "epoch": 5.93, "eval_loss": 2.2921693325042725, "eval_runtime": 246.0396, "eval_samples_per_second": 812.877, "eval_steps_per_second": 50.805, "step": 1408000 }, { "epoch": 5.96, "eval_loss": 2.2846784591674805, "eval_runtime": 243.9013, "eval_samples_per_second": 820.004, "eval_steps_per_second": 51.25, "step": 1416000 }, { "epoch": 6.0, "learning_rate": 1.6673333333333333e-07, "loss": 2.4291, "step": 1424000 }, { "epoch": 6.0, "eval_loss": 2.285550117492676, "eval_runtime": 244.514, "eval_samples_per_second": 817.949, "eval_steps_per_second": 51.122, "step": 1424000 }, { "epoch": 6.03, "eval_loss": 2.287602186203003, "eval_runtime": 245.7265, "eval_samples_per_second": 813.913, "eval_steps_per_second": 50.87, "step": 1432000 }, { "epoch": 6.06, "learning_rate": 1.64e-07, "loss": 2.4289, "step": 1440000 }, { "epoch": 6.06, "eval_loss": 2.282216787338257, "eval_runtime": 247.3389, "eval_samples_per_second": 808.607, "eval_steps_per_second": 50.538, "step": 1440000 }, { "epoch": 6.1, "eval_loss": 2.2787294387817383, "eval_runtime": 247.7887, "eval_samples_per_second": 807.139, "eval_steps_per_second": 50.446, "step": 1448000 }, { "epoch": 6.13, "learning_rate": 1.6126666666666667e-07, "loss": 2.4272, "step": 1456000 }, { "epoch": 6.13, "eval_loss": 2.2810943126678467, "eval_runtime": 244.5008, "eval_samples_per_second": 817.993, "eval_steps_per_second": 51.125, "step": 1456000 }, { "epoch": 6.16, "eval_loss": 2.285348415374756, "eval_runtime": 246.9617, "eval_samples_per_second": 809.842, "eval_steps_per_second": 50.615, "step": 1464000 }, { "epoch": 6.2, "learning_rate": 1.5853333333333332e-07, "loss": 2.4267, "step": 1472000 }, { "epoch": 6.2, "eval_loss": 2.2818119525909424, "eval_runtime": 245.9708, "eval_samples_per_second": 813.105, "eval_steps_per_second": 50.819, "step": 1472000 }, { "epoch": 6.23, "eval_loss": 2.2764692306518555, "eval_runtime": 249.5295, "eval_samples_per_second": 801.509, "eval_steps_per_second": 50.094, "step": 1480000 }, { "epoch": 6.27, "learning_rate": 1.558e-07, "loss": 2.4237, "step": 1488000 }, { "epoch": 6.27, "eval_loss": 2.2790732383728027, "eval_runtime": 247.115, "eval_samples_per_second": 809.34, "eval_steps_per_second": 50.584, "step": 1488000 }, { "epoch": 6.3, "eval_loss": 2.2768290042877197, "eval_runtime": 246.6491, "eval_samples_per_second": 810.868, "eval_steps_per_second": 50.679, "step": 1496000 }, { "epoch": 6.33, "learning_rate": 1.5306666666666666e-07, "loss": 2.4277, "step": 1504000 }, { "epoch": 6.33, "eval_loss": 2.286600351333618, "eval_runtime": 247.5007, "eval_samples_per_second": 808.078, "eval_steps_per_second": 50.505, "step": 1504000 }, { "epoch": 6.37, "eval_loss": 2.282073974609375, "eval_runtime": 245.5734, "eval_samples_per_second": 814.42, "eval_steps_per_second": 50.901, "step": 1512000 }, { "epoch": 6.4, "learning_rate": 1.5033333333333332e-07, "loss": 2.4316, "step": 1520000 }, { "epoch": 6.4, "eval_loss": 2.285618305206299, "eval_runtime": 245.7388, "eval_samples_per_second": 813.872, "eval_steps_per_second": 50.867, "step": 1520000 }, { "epoch": 6.43, "eval_loss": 2.2820215225219727, "eval_runtime": 247.4957, "eval_samples_per_second": 808.095, "eval_steps_per_second": 50.506, "step": 1528000 }, { "epoch": 6.47, "learning_rate": 1.476e-07, "loss": 2.4222, "step": 1536000 }, { "epoch": 6.47, "eval_loss": 2.2891499996185303, "eval_runtime": 245.6614, "eval_samples_per_second": 814.129, "eval_steps_per_second": 50.883, "step": 1536000 }, { "epoch": 6.5, "eval_loss": 2.2802817821502686, "eval_runtime": 246.4523, "eval_samples_per_second": 811.516, "eval_steps_per_second": 50.72, "step": 1544000 }, { "epoch": 6.53, "learning_rate": 1.4486666666666665e-07, "loss": 2.426, "step": 1552000 }, { "epoch": 6.53, "eval_loss": 2.279686689376831, "eval_runtime": 247.3876, "eval_samples_per_second": 808.448, "eval_steps_per_second": 50.528, "step": 1552000 }, { "epoch": 6.57, "eval_loss": 2.284360408782959, "eval_runtime": 246.8856, "eval_samples_per_second": 810.092, "eval_steps_per_second": 50.631, "step": 1560000 }, { "epoch": 6.6, "learning_rate": 1.4213333333333334e-07, "loss": 2.422, "step": 1568000 }, { "epoch": 6.6, "eval_loss": 2.287179708480835, "eval_runtime": 245.0018, "eval_samples_per_second": 816.321, "eval_steps_per_second": 51.02, "step": 1568000 }, { "epoch": 6.64, "eval_loss": 2.2903947830200195, "eval_runtime": 244.8984, "eval_samples_per_second": 816.665, "eval_steps_per_second": 51.042, "step": 1576000 }, { "epoch": 6.67, "learning_rate": 1.3940000000000002e-07, "loss": 2.4323, "step": 1584000 }, { "epoch": 6.67, "eval_loss": 2.2796852588653564, "eval_runtime": 245.135, "eval_samples_per_second": 815.877, "eval_steps_per_second": 50.992, "step": 1584000 }, { "epoch": 6.7, "eval_loss": 2.275681495666504, "eval_runtime": 247.5616, "eval_samples_per_second": 807.88, "eval_steps_per_second": 50.492, "step": 1592000 }, { "epoch": 6.74, "learning_rate": 1.3666666666666665e-07, "loss": 2.4315, "step": 1600000 }, { "epoch": 6.74, "eval_loss": 2.287421703338623, "eval_runtime": 247.5879, "eval_samples_per_second": 807.794, "eval_steps_per_second": 50.487, "step": 1600000 }, { "epoch": 6.77, "eval_loss": 2.276327133178711, "eval_runtime": 245.1196, "eval_samples_per_second": 815.928, "eval_steps_per_second": 50.996, "step": 1608000 }, { "epoch": 6.8, "learning_rate": 1.3393333333333333e-07, "loss": 2.421, "step": 1616000 }, { "epoch": 6.8, "eval_loss": 2.2857308387756348, "eval_runtime": 245.1941, "eval_samples_per_second": 815.68, "eval_steps_per_second": 50.98, "step": 1616000 }, { "epoch": 6.84, "eval_loss": 2.280374765396118, "eval_runtime": 247.8166, "eval_samples_per_second": 807.048, "eval_steps_per_second": 50.441, "step": 1624000 }, { "epoch": 6.87, "learning_rate": 1.312e-07, "loss": 2.4299, "step": 1632000 }, { "epoch": 6.87, "eval_loss": 2.282515287399292, "eval_runtime": 245.4773, "eval_samples_per_second": 814.739, "eval_steps_per_second": 50.921, "step": 1632000 }, { "epoch": 6.91, "eval_loss": 2.2819290161132812, "eval_runtime": 247.6539, "eval_samples_per_second": 807.579, "eval_steps_per_second": 50.474, "step": 1640000 }, { "epoch": 6.94, "learning_rate": 1.2846666666666667e-07, "loss": 2.4289, "step": 1648000 }, { "epoch": 6.94, "eval_loss": 2.28240704536438, "eval_runtime": 245.8148, "eval_samples_per_second": 813.621, "eval_steps_per_second": 50.851, "step": 1648000 }, { "epoch": 6.97, "eval_loss": 2.282073974609375, "eval_runtime": 247.9192, "eval_samples_per_second": 806.714, "eval_steps_per_second": 50.42, "step": 1656000 }, { "epoch": 7.01, "learning_rate": 1.2573333333333332e-07, "loss": 2.4257, "step": 1664000 }, { "epoch": 7.01, "eval_loss": 2.2802059650421143, "eval_runtime": 248.4226, "eval_samples_per_second": 805.08, "eval_steps_per_second": 50.317, "step": 1664000 }, { "epoch": 7.04, "eval_loss": 2.2760419845581055, "eval_runtime": 246.2143, "eval_samples_per_second": 812.3, "eval_steps_per_second": 50.769, "step": 1672000 }, { "epoch": 7.07, "learning_rate": 1.23e-07, "loss": 2.4227, "step": 1680000 }, { "epoch": 7.07, "eval_loss": 2.28104305267334, "eval_runtime": 247.8478, "eval_samples_per_second": 806.947, "eval_steps_per_second": 50.434, "step": 1680000 }, { "epoch": 7.11, "eval_loss": 2.2776732444763184, "eval_runtime": 245.7431, "eval_samples_per_second": 813.858, "eval_steps_per_second": 50.866, "step": 1688000 }, { "epoch": 7.14, "learning_rate": 1.2026666666666666e-07, "loss": 2.4287, "step": 1696000 }, { "epoch": 7.14, "eval_loss": 2.277235984802246, "eval_runtime": 245.5934, "eval_samples_per_second": 814.354, "eval_steps_per_second": 50.897, "step": 1696000 }, { "epoch": 7.17, "eval_loss": 2.2786269187927246, "eval_runtime": 246.4934, "eval_samples_per_second": 811.381, "eval_steps_per_second": 50.711, "step": 1704000 }, { "epoch": 7.21, "learning_rate": 1.1753333333333334e-07, "loss": 2.4227, "step": 1712000 }, { "epoch": 7.21, "eval_loss": 2.285905361175537, "eval_runtime": 246.4981, "eval_samples_per_second": 811.365, "eval_steps_per_second": 50.71, "step": 1712000 }, { "epoch": 7.24, "eval_loss": 2.2862019538879395, "eval_runtime": 247.148, "eval_samples_per_second": 809.232, "eval_steps_per_second": 50.577, "step": 1720000 }, { "epoch": 7.28, "learning_rate": 1.1480000000000001e-07, "loss": 2.4262, "step": 1728000 }, { "epoch": 7.28, "eval_loss": 2.278900146484375, "eval_runtime": 246.4039, "eval_samples_per_second": 811.675, "eval_steps_per_second": 50.73, "step": 1728000 }, { "epoch": 7.31, "eval_loss": 2.2848124504089355, "eval_runtime": 246.3089, "eval_samples_per_second": 811.988, "eval_steps_per_second": 50.749, "step": 1736000 }, { "epoch": 7.34, "learning_rate": 1.1206666666666666e-07, "loss": 2.4263, "step": 1744000 }, { "epoch": 7.34, "eval_loss": 2.275432586669922, "eval_runtime": 246.8456, "eval_samples_per_second": 810.223, "eval_steps_per_second": 50.639, "step": 1744000 }, { "epoch": 7.38, "eval_loss": 2.2777817249298096, "eval_runtime": 247.2848, "eval_samples_per_second": 808.784, "eval_steps_per_second": 50.549, "step": 1752000 }, { "epoch": 7.41, "learning_rate": 1.0933333333333333e-07, "loss": 2.4246, "step": 1760000 }, { "epoch": 7.41, "eval_loss": 2.27353572845459, "eval_runtime": 247.6065, "eval_samples_per_second": 807.733, "eval_steps_per_second": 50.483, "step": 1760000 }, { "epoch": 7.44, "eval_loss": 2.2827255725860596, "eval_runtime": 246.5965, "eval_samples_per_second": 811.041, "eval_steps_per_second": 50.69, "step": 1768000 }, { "epoch": 7.48, "learning_rate": 1.066e-07, "loss": 2.4147, "step": 1776000 }, { "epoch": 7.48, "eval_loss": 2.285012722015381, "eval_runtime": 246.8181, "eval_samples_per_second": 810.313, "eval_steps_per_second": 50.645, "step": 1776000 }, { "epoch": 7.51, "eval_loss": 2.282083511352539, "eval_runtime": 246.9583, "eval_samples_per_second": 809.853, "eval_steps_per_second": 50.616, "step": 1784000 }, { "epoch": 7.55, "learning_rate": 1.0386666666666667e-07, "loss": 2.4288, "step": 1792000 }, { "epoch": 7.55, "eval_loss": 2.280324935913086, "eval_runtime": 246.1706, "eval_samples_per_second": 812.445, "eval_steps_per_second": 50.778, "step": 1792000 }, { "epoch": 7.58, "eval_loss": 2.2760393619537354, "eval_runtime": 246.0656, "eval_samples_per_second": 812.791, "eval_steps_per_second": 50.799, "step": 1800000 }, { "epoch": 7.61, "learning_rate": 1.0113333333333334e-07, "loss": 2.4231, "step": 1808000 }, { "epoch": 7.61, "eval_loss": 2.2748613357543945, "eval_runtime": 246.3683, "eval_samples_per_second": 811.793, "eval_steps_per_second": 50.737, "step": 1808000 }, { "epoch": 7.65, "eval_loss": 2.2749080657958984, "eval_runtime": 246.3319, "eval_samples_per_second": 811.913, "eval_steps_per_second": 50.745, "step": 1816000 }, { "epoch": 7.68, "learning_rate": 9.84e-08, "loss": 2.4243, "step": 1824000 }, { "epoch": 7.68, "eval_loss": 2.274254322052002, "eval_runtime": 245.7767, "eval_samples_per_second": 813.747, "eval_steps_per_second": 50.859, "step": 1824000 }, { "epoch": 7.71, "eval_loss": 2.2792415618896484, "eval_runtime": 247.2515, "eval_samples_per_second": 808.893, "eval_steps_per_second": 50.556, "step": 1832000 }, { "epoch": 7.75, "learning_rate": 9.566666666666666e-08, "loss": 2.4215, "step": 1840000 }, { "epoch": 7.75, "eval_loss": 2.275153398513794, "eval_runtime": 247.4479, "eval_samples_per_second": 808.251, "eval_steps_per_second": 50.516, "step": 1840000 }, { "epoch": 7.78, "eval_loss": 2.2769758701324463, "eval_runtime": 247.8276, "eval_samples_per_second": 807.013, "eval_steps_per_second": 50.438, "step": 1848000 }, { "epoch": 7.81, "learning_rate": 9.293333333333333e-08, "loss": 2.4213, "step": 1856000 }, { "epoch": 7.81, "eval_loss": 2.280165672302246, "eval_runtime": 246.5191, "eval_samples_per_second": 811.296, "eval_steps_per_second": 50.706, "step": 1856000 }, { "epoch": 7.85, "eval_loss": 2.27955961227417, "eval_runtime": 248.3255, "eval_samples_per_second": 805.394, "eval_steps_per_second": 50.337, "step": 1864000 }, { "epoch": 7.88, "learning_rate": 9.02e-08, "loss": 2.4236, "step": 1872000 }, { "epoch": 7.88, "eval_loss": 2.2882931232452393, "eval_runtime": 246.4462, "eval_samples_per_second": 811.536, "eval_steps_per_second": 50.721, "step": 1872000 }, { "epoch": 7.92, "eval_loss": 2.279242515563965, "eval_runtime": 246.5281, "eval_samples_per_second": 811.266, "eval_steps_per_second": 50.704, "step": 1880000 }, { "epoch": 7.95, "learning_rate": 8.746666666666667e-08, "loss": 2.4237, "step": 1888000 }, { "epoch": 7.95, "eval_loss": 2.272642135620117, "eval_runtime": 247.5094, "eval_samples_per_second": 808.05, "eval_steps_per_second": 50.503, "step": 1888000 }, { "epoch": 7.98, "eval_loss": 2.2816178798675537, "eval_runtime": 248.3802, "eval_samples_per_second": 805.217, "eval_steps_per_second": 50.326, "step": 1896000 }, { "epoch": 8.02, "learning_rate": 8.473333333333334e-08, "loss": 2.4183, "step": 1904000 }, { "epoch": 8.02, "eval_loss": 2.2790186405181885, "eval_runtime": 247.7385, "eval_samples_per_second": 807.303, "eval_steps_per_second": 50.456, "step": 1904000 }, { "epoch": 8.05, "eval_loss": 2.2814745903015137, "eval_runtime": 248.3044, "eval_samples_per_second": 805.463, "eval_steps_per_second": 50.341, "step": 1912000 }, { "epoch": 8.08, "learning_rate": 8.2e-08, "loss": 2.4215, "step": 1920000 }, { "epoch": 8.08, "eval_loss": 2.277374744415283, "eval_runtime": 246.8648, "eval_samples_per_second": 810.16, "eval_steps_per_second": 50.635, "step": 1920000 }, { "epoch": 8.12, "eval_loss": 2.270047664642334, "eval_runtime": 246.8413, "eval_samples_per_second": 810.237, "eval_steps_per_second": 50.64, "step": 1928000 }, { "epoch": 8.15, "learning_rate": 7.926666666666666e-08, "loss": 2.4258, "step": 1936000 }, { "epoch": 8.15, "eval_loss": 2.276344060897827, "eval_runtime": 246.9748, "eval_samples_per_second": 809.799, "eval_steps_per_second": 50.612, "step": 1936000 }, { "epoch": 8.19, "eval_loss": 2.278593063354492, "eval_runtime": 248.7283, "eval_samples_per_second": 804.09, "eval_steps_per_second": 50.256, "step": 1944000 }, { "epoch": 8.22, "learning_rate": 7.653333333333333e-08, "loss": 2.4209, "step": 1952000 }, { "epoch": 8.22, "eval_loss": 2.2763326168060303, "eval_runtime": 247.5692, "eval_samples_per_second": 807.855, "eval_steps_per_second": 50.491, "step": 1952000 }, { "epoch": 8.25, "eval_loss": 2.278881072998047, "eval_runtime": 247.8151, "eval_samples_per_second": 807.053, "eval_steps_per_second": 50.441, "step": 1960000 }, { "epoch": 8.29, "learning_rate": 7.38e-08, "loss": 2.4217, "step": 1968000 }, { "epoch": 8.29, "eval_loss": 2.2783920764923096, "eval_runtime": 248.9593, "eval_samples_per_second": 803.344, "eval_steps_per_second": 50.209, "step": 1968000 }, { "epoch": 8.32, "eval_loss": 2.2773079872131348, "eval_runtime": 247.6818, "eval_samples_per_second": 807.488, "eval_steps_per_second": 50.468, "step": 1976000 }, { "epoch": 8.35, "learning_rate": 7.106666666666667e-08, "loss": 2.4279, "step": 1984000 }, { "epoch": 8.35, "eval_loss": 2.286058187484741, "eval_runtime": 247.013, "eval_samples_per_second": 809.674, "eval_steps_per_second": 50.605, "step": 1984000 }, { "epoch": 8.39, "eval_loss": 2.2728214263916016, "eval_runtime": 247.152, "eval_samples_per_second": 809.219, "eval_steps_per_second": 50.576, "step": 1992000 }, { "epoch": 8.42, "learning_rate": 6.833333333333332e-08, "loss": 2.4268, "step": 2000000 }, { "epoch": 8.42, "eval_loss": 2.276221513748169, "eval_runtime": 247.3718, "eval_samples_per_second": 808.5, "eval_steps_per_second": 50.531, "step": 2000000 }, { "epoch": 8.45, "eval_loss": 2.2788944244384766, "eval_runtime": 247.8322, "eval_samples_per_second": 806.998, "eval_steps_per_second": 50.437, "step": 2008000 }, { "epoch": 8.49, "learning_rate": 6.56e-08, "loss": 2.4177, "step": 2016000 }, { "epoch": 8.49, "eval_loss": 2.282210350036621, "eval_runtime": 247.9565, "eval_samples_per_second": 806.593, "eval_steps_per_second": 50.412, "step": 2016000 }, { "epoch": 8.52, "eval_loss": 2.275873899459839, "eval_runtime": 247.5408, "eval_samples_per_second": 807.948, "eval_steps_per_second": 50.497, "step": 2024000 }, { "epoch": 8.56, "learning_rate": 6.286666666666666e-08, "loss": 2.4166, "step": 2032000 }, { "epoch": 8.56, "eval_loss": 2.2791593074798584, "eval_runtime": 248.9215, "eval_samples_per_second": 803.466, "eval_steps_per_second": 50.217, "step": 2032000 }, { "epoch": 8.59, "eval_loss": 2.272076368331909, "eval_runtime": 248.0422, "eval_samples_per_second": 806.315, "eval_steps_per_second": 50.395, "step": 2040000 }, { "epoch": 8.62, "learning_rate": 6.013333333333333e-08, "loss": 2.4223, "step": 2048000 }, { "epoch": 8.62, "eval_loss": 2.2768054008483887, "eval_runtime": 247.9612, "eval_samples_per_second": 806.578, "eval_steps_per_second": 50.411, "step": 2048000 }, { "epoch": 8.66, "eval_loss": 2.2726008892059326, "eval_runtime": 247.7176, "eval_samples_per_second": 807.371, "eval_steps_per_second": 50.461, "step": 2056000 }, { "epoch": 8.69, "learning_rate": 5.7400000000000004e-08, "loss": 2.4139, "step": 2064000 }, { "epoch": 8.69, "eval_loss": 2.282505512237549, "eval_runtime": 247.6026, "eval_samples_per_second": 807.746, "eval_steps_per_second": 50.484, "step": 2064000 }, { "epoch": 8.72, "eval_loss": 2.2738897800445557, "eval_runtime": 248.4813, "eval_samples_per_second": 804.89, "eval_steps_per_second": 50.306, "step": 2072000 }, { "epoch": 8.76, "learning_rate": 5.4666666666666666e-08, "loss": 2.4236, "step": 2080000 }, { "epoch": 8.76, "eval_loss": 2.2834410667419434, "eval_runtime": 248.3802, "eval_samples_per_second": 805.217, "eval_steps_per_second": 50.326, "step": 2080000 }, { "epoch": 8.79, "eval_loss": 2.2750093936920166, "eval_runtime": 247.5076, "eval_samples_per_second": 808.056, "eval_steps_per_second": 50.503, "step": 2088000 }, { "epoch": 8.83, "learning_rate": 5.1933333333333335e-08, "loss": 2.4235, "step": 2096000 }, { "epoch": 8.83, "eval_loss": 2.275216817855835, "eval_runtime": 248.1106, "eval_samples_per_second": 806.092, "eval_steps_per_second": 50.381, "step": 2096000 }, { "epoch": 8.86, "eval_loss": 2.28031587600708, "eval_runtime": 247.6614, "eval_samples_per_second": 807.554, "eval_steps_per_second": 50.472, "step": 2104000 }, { "epoch": 8.89, "learning_rate": 4.92e-08, "loss": 2.4193, "step": 2112000 }, { "epoch": 8.89, "eval_loss": 2.2762908935546875, "eval_runtime": 247.8319, "eval_samples_per_second": 806.999, "eval_steps_per_second": 50.437, "step": 2112000 }, { "epoch": 8.93, "eval_loss": 2.2754592895507812, "eval_runtime": 247.5134, "eval_samples_per_second": 808.037, "eval_steps_per_second": 50.502, "step": 2120000 }, { "epoch": 8.96, "learning_rate": 4.6466666666666666e-08, "loss": 2.4179, "step": 2128000 }, { "epoch": 8.96, "eval_loss": 2.279421091079712, "eval_runtime": 248.0477, "eval_samples_per_second": 806.297, "eval_steps_per_second": 50.394, "step": 2128000 }, { "epoch": 8.99, "eval_loss": 2.2710611820220947, "eval_runtime": 247.3731, "eval_samples_per_second": 808.495, "eval_steps_per_second": 50.531, "step": 2136000 }, { "epoch": 9.03, "learning_rate": 4.3733333333333335e-08, "loss": 2.4181, "step": 2144000 }, { "epoch": 9.03, "eval_loss": 2.279233932495117, "eval_runtime": 248.7527, "eval_samples_per_second": 804.011, "eval_steps_per_second": 50.251, "step": 2144000 }, { "epoch": 9.06, "eval_loss": 2.275193691253662, "eval_runtime": 248.241, "eval_samples_per_second": 805.669, "eval_steps_per_second": 50.354, "step": 2152000 }, { "epoch": 9.09, "learning_rate": 4.1e-08, "loss": 2.4173, "step": 2160000 }, { "epoch": 9.09, "eval_loss": 2.2775352001190186, "eval_runtime": 248.2377, "eval_samples_per_second": 805.679, "eval_steps_per_second": 50.355, "step": 2160000 }, { "epoch": 9.13, "eval_loss": 2.2751924991607666, "eval_runtime": 248.4267, "eval_samples_per_second": 805.066, "eval_steps_per_second": 50.317, "step": 2168000 }, { "epoch": 9.16, "learning_rate": 3.8266666666666665e-08, "loss": 2.4242, "step": 2176000 }, { "epoch": 9.16, "eval_loss": 2.272923469543457, "eval_runtime": 248.2584, "eval_samples_per_second": 805.612, "eval_steps_per_second": 50.351, "step": 2176000 }, { "epoch": 9.2, "eval_loss": 2.2793116569519043, "eval_runtime": 248.8342, "eval_samples_per_second": 803.748, "eval_steps_per_second": 50.234, "step": 2184000 }, { "epoch": 9.23, "learning_rate": 3.5533333333333334e-08, "loss": 2.4166, "step": 2192000 }, { "epoch": 9.23, "eval_loss": 2.2719197273254395, "eval_runtime": 249.6006, "eval_samples_per_second": 801.28, "eval_steps_per_second": 50.08, "step": 2192000 }, { "epoch": 9.26, "eval_loss": 2.281975507736206, "eval_runtime": 248.1074, "eval_samples_per_second": 806.103, "eval_steps_per_second": 50.381, "step": 2200000 }, { "epoch": 9.3, "learning_rate": 3.28e-08, "loss": 2.4181, "step": 2208000 }, { "epoch": 9.3, "eval_loss": 2.2715933322906494, "eval_runtime": 249.4343, "eval_samples_per_second": 801.814, "eval_steps_per_second": 50.113, "step": 2208000 }, { "epoch": 9.33, "eval_loss": 2.285536050796509, "eval_runtime": 250.2462, "eval_samples_per_second": 799.213, "eval_steps_per_second": 49.951, "step": 2216000 }, { "epoch": 9.36, "learning_rate": 3.0066666666666665e-08, "loss": 2.4245, "step": 2224000 }, { "epoch": 9.36, "eval_loss": 2.2805118560791016, "eval_runtime": 248.8621, "eval_samples_per_second": 803.658, "eval_steps_per_second": 50.229, "step": 2224000 }, { "epoch": 9.4, "eval_loss": 2.272109270095825, "eval_runtime": 250.3664, "eval_samples_per_second": 798.829, "eval_steps_per_second": 49.927, "step": 2232000 }, { "epoch": 9.43, "learning_rate": 2.7333333333333333e-08, "loss": 2.4204, "step": 2240000 }, { "epoch": 9.43, "eval_loss": 2.270662546157837, "eval_runtime": 248.4336, "eval_samples_per_second": 805.044, "eval_steps_per_second": 50.315, "step": 2240000 }, { "epoch": 9.47, "eval_loss": 2.2766847610473633, "eval_runtime": 248.6132, "eval_samples_per_second": 804.462, "eval_steps_per_second": 50.279, "step": 2248000 }, { "epoch": 9.5, "learning_rate": 2.46e-08, "loss": 2.4255, "step": 2256000 }, { "epoch": 9.5, "eval_loss": 2.2710325717926025, "eval_runtime": 248.6879, "eval_samples_per_second": 804.221, "eval_steps_per_second": 50.264, "step": 2256000 }, { "epoch": 9.53, "eval_loss": 2.2814137935638428, "eval_runtime": 249.0563, "eval_samples_per_second": 803.031, "eval_steps_per_second": 50.189, "step": 2264000 }, { "epoch": 9.57, "learning_rate": 2.1866666666666667e-08, "loss": 2.4254, "step": 2272000 }, { "epoch": 9.57, "eval_loss": 2.274559259414673, "eval_runtime": 249.1887, "eval_samples_per_second": 802.605, "eval_steps_per_second": 50.163, "step": 2272000 }, { "epoch": 9.6, "eval_loss": 2.2766289710998535, "eval_runtime": 248.6168, "eval_samples_per_second": 804.451, "eval_steps_per_second": 50.278, "step": 2280000 }, { "epoch": 9.63, "learning_rate": 1.9133333333333333e-08, "loss": 2.4232, "step": 2288000 }, { "epoch": 9.63, "eval_loss": 2.2724950313568115, "eval_runtime": 248.9678, "eval_samples_per_second": 803.317, "eval_steps_per_second": 50.207, "step": 2288000 }, { "epoch": 9.67, "eval_loss": 2.2764828205108643, "eval_runtime": 248.7636, "eval_samples_per_second": 803.976, "eval_steps_per_second": 50.249, "step": 2296000 }, { "epoch": 9.7, "learning_rate": 1.64e-08, "loss": 2.4189, "step": 2304000 }, { "epoch": 9.7, "eval_loss": 2.275614023208618, "eval_runtime": 249.9482, "eval_samples_per_second": 800.166, "eval_steps_per_second": 50.01, "step": 2304000 }, { "epoch": 9.73, "eval_loss": 2.2767763137817383, "eval_runtime": 248.8725, "eval_samples_per_second": 803.624, "eval_steps_per_second": 50.227, "step": 2312000 }, { "epoch": 9.77, "learning_rate": 1.3666666666666667e-08, "loss": 2.4105, "step": 2320000 }, { "epoch": 9.77, "eval_loss": 2.280364751815796, "eval_runtime": 248.8287, "eval_samples_per_second": 803.766, "eval_steps_per_second": 50.235, "step": 2320000 }, { "epoch": 9.8, "eval_loss": 2.2873153686523438, "eval_runtime": 248.9747, "eval_samples_per_second": 803.295, "eval_steps_per_second": 50.206, "step": 2328000 }, { "epoch": 9.84, "learning_rate": 1.0933333333333334e-08, "loss": 2.415, "step": 2336000 }, { "epoch": 9.84, "eval_loss": 2.278338670730591, "eval_runtime": 248.9743, "eval_samples_per_second": 803.296, "eval_steps_per_second": 50.206, "step": 2336000 }, { "epoch": 9.87, "eval_loss": 2.2737488746643066, "eval_runtime": 250.6409, "eval_samples_per_second": 797.954, "eval_steps_per_second": 49.872, "step": 2344000 }, { "epoch": 9.9, "learning_rate": 8.2e-09, "loss": 2.4174, "step": 2352000 }, { "epoch": 9.9, "eval_loss": 2.278608798980713, "eval_runtime": 251.8413, "eval_samples_per_second": 794.151, "eval_steps_per_second": 49.634, "step": 2352000 }, { "epoch": 9.94, "eval_loss": 2.2729651927948, "eval_runtime": 250.8498, "eval_samples_per_second": 797.29, "eval_steps_per_second": 49.831, "step": 2360000 }, { "epoch": 9.97, "learning_rate": 5.466666666666667e-09, "loss": 2.4199, "step": 2368000 }, { "epoch": 9.97, "eval_loss": 2.2793538570404053, "eval_runtime": 253.8866, "eval_samples_per_second": 787.753, "eval_steps_per_second": 49.235, "step": 2368000 }, { "epoch": 10.0, "eval_loss": 2.284799337387085, "eval_runtime": 252.1889, "eval_samples_per_second": 793.056, "eval_steps_per_second": 49.566, "step": 2376000 }, { "epoch": 10.04, "learning_rate": 2.7333333333333334e-09, "loss": 2.4224, "step": 2384000 }, { "epoch": 10.04, "eval_loss": 2.2810616493225098, "eval_runtime": 251.6013, "eval_samples_per_second": 794.909, "eval_steps_per_second": 49.682, "step": 2384000 }, { "epoch": 10.07, "eval_loss": 2.281796932220459, "eval_runtime": 251.0825, "eval_samples_per_second": 796.551, "eval_steps_per_second": 49.784, "step": 2392000 }, { "epoch": 10.11, "learning_rate": 0.0, "loss": 2.4226, "step": 2400000 }, { "epoch": 10.11, "eval_loss": 2.279843330383301, "eval_runtime": 250.0989, "eval_samples_per_second": 799.684, "eval_steps_per_second": 49.98, "step": 2400000 }, { "epoch": 10.11, "step": 2400000, "total_flos": 8.368611666112401e+17, "train_loss": 2.4345372688802085, "train_runtime": 232385.3236, "train_samples_per_second": 165.243, "train_steps_per_second": 10.328 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 11, "save_steps": 32000, "total_flos": 8.368611666112401e+17, "trial_name": null, "trial_params": null }