|
{ |
|
"best_metric": 2.270662546157837, |
|
"best_model_checkpoint": "./model_tweets_2020_Q4_50_rand/checkpoint-2240000", |
|
"epoch": 10.105220609597433, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3816494941711426, |
|
"eval_runtime": 236.9637, |
|
"eval_samples_per_second": 844.011, |
|
"eval_steps_per_second": 52.751, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.5089, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.364738702774048, |
|
"eval_runtime": 237.3943, |
|
"eval_samples_per_second": 842.48, |
|
"eval_steps_per_second": 52.655, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.355194091796875, |
|
"eval_runtime": 237.9121, |
|
"eval_samples_per_second": 840.647, |
|
"eval_steps_per_second": 52.54, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.4989, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.3528289794921875, |
|
"eval_runtime": 238.6586, |
|
"eval_samples_per_second": 838.017, |
|
"eval_steps_per_second": 52.376, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.3485841751098633, |
|
"eval_runtime": 239.5908, |
|
"eval_samples_per_second": 834.756, |
|
"eval_steps_per_second": 52.172, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.4836, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.34626841545105, |
|
"eval_runtime": 238.7656, |
|
"eval_samples_per_second": 837.642, |
|
"eval_steps_per_second": 52.353, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.3410613536834717, |
|
"eval_runtime": 240.0423, |
|
"eval_samples_per_second": 833.186, |
|
"eval_steps_per_second": 52.074, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.4904, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.3394079208374023, |
|
"eval_runtime": 239.0867, |
|
"eval_samples_per_second": 836.517, |
|
"eval_steps_per_second": 52.282, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.3349857330322266, |
|
"eval_runtime": 238.1477, |
|
"eval_samples_per_second": 839.815, |
|
"eval_steps_per_second": 52.488, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.4733, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.330885410308838, |
|
"eval_runtime": 239.5566, |
|
"eval_samples_per_second": 834.876, |
|
"eval_steps_per_second": 52.18, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.3288557529449463, |
|
"eval_runtime": 239.3525, |
|
"eval_samples_per_second": 835.588, |
|
"eval_steps_per_second": 52.224, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.4675, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.3381102085113525, |
|
"eval_runtime": 239.1998, |
|
"eval_samples_per_second": 836.121, |
|
"eval_steps_per_second": 52.258, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.331658363342285, |
|
"eval_runtime": 237.7084, |
|
"eval_samples_per_second": 841.367, |
|
"eval_steps_per_second": 52.585, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.4762, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.3218166828155518, |
|
"eval_runtime": 239.0796, |
|
"eval_samples_per_second": 836.541, |
|
"eval_steps_per_second": 52.284, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.326749086380005, |
|
"eval_runtime": 239.0317, |
|
"eval_samples_per_second": 836.709, |
|
"eval_steps_per_second": 52.294, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.4616, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.3240551948547363, |
|
"eval_runtime": 238.0739, |
|
"eval_samples_per_second": 840.075, |
|
"eval_steps_per_second": 52.505, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.3280670642852783, |
|
"eval_runtime": 238.6608, |
|
"eval_samples_per_second": 838.009, |
|
"eval_steps_per_second": 52.376, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.4601, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.315213680267334, |
|
"eval_runtime": 238.6484, |
|
"eval_samples_per_second": 838.053, |
|
"eval_steps_per_second": 52.378, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 2.3242688179016113, |
|
"eval_runtime": 239.0187, |
|
"eval_samples_per_second": 836.755, |
|
"eval_steps_per_second": 52.297, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.4563, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.3201961517333984, |
|
"eval_runtime": 240.1294, |
|
"eval_samples_per_second": 832.884, |
|
"eval_steps_per_second": 52.055, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.3193206787109375, |
|
"eval_runtime": 241.1825, |
|
"eval_samples_per_second": 829.247, |
|
"eval_steps_per_second": 51.828, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.459, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.318166732788086, |
|
"eval_runtime": 238.5889, |
|
"eval_samples_per_second": 838.262, |
|
"eval_steps_per_second": 52.391, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 2.324726104736328, |
|
"eval_runtime": 238.4567, |
|
"eval_samples_per_second": 838.727, |
|
"eval_steps_per_second": 52.42, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.4639, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.320105791091919, |
|
"eval_runtime": 238.6268, |
|
"eval_samples_per_second": 838.129, |
|
"eval_steps_per_second": 52.383, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.3243095874786377, |
|
"eval_runtime": 239.9922, |
|
"eval_samples_per_second": 833.36, |
|
"eval_steps_per_second": 52.085, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.4561, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.321760416030884, |
|
"eval_runtime": 239.3254, |
|
"eval_samples_per_second": 835.682, |
|
"eval_steps_per_second": 52.23, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.3137271404266357, |
|
"eval_runtime": 240.8808, |
|
"eval_samples_per_second": 830.286, |
|
"eval_steps_per_second": 51.893, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.4556, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.318033456802368, |
|
"eval_runtime": 239.3207, |
|
"eval_samples_per_second": 835.699, |
|
"eval_steps_per_second": 52.231, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.314727783203125, |
|
"eval_runtime": 238.4213, |
|
"eval_samples_per_second": 838.851, |
|
"eval_steps_per_second": 52.428, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.4573, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.309976100921631, |
|
"eval_runtime": 238.672, |
|
"eval_samples_per_second": 837.97, |
|
"eval_steps_per_second": 52.373, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.311811923980713, |
|
"eval_runtime": 239.3065, |
|
"eval_samples_per_second": 835.748, |
|
"eval_steps_per_second": 52.234, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.4516, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.315763235092163, |
|
"eval_runtime": 239.6224, |
|
"eval_samples_per_second": 834.646, |
|
"eval_steps_per_second": 52.165, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.3132565021514893, |
|
"eval_runtime": 241.721, |
|
"eval_samples_per_second": 827.4, |
|
"eval_steps_per_second": 51.713, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.4561, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.3065385818481445, |
|
"eval_runtime": 241.1529, |
|
"eval_samples_per_second": 829.349, |
|
"eval_steps_per_second": 51.834, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.318328857421875, |
|
"eval_runtime": 240.9143, |
|
"eval_samples_per_second": 830.171, |
|
"eval_steps_per_second": 51.886, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.4476, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.310638666152954, |
|
"eval_runtime": 240.8865, |
|
"eval_samples_per_second": 830.267, |
|
"eval_steps_per_second": 51.892, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.3131144046783447, |
|
"eval_runtime": 238.8251, |
|
"eval_samples_per_second": 837.433, |
|
"eval_steps_per_second": 52.34, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.4503, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.3103673458099365, |
|
"eval_runtime": 238.9913, |
|
"eval_samples_per_second": 836.851, |
|
"eval_steps_per_second": 52.303, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.310051679611206, |
|
"eval_runtime": 239.4298, |
|
"eval_samples_per_second": 835.318, |
|
"eval_steps_per_second": 52.207, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.4495, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.3085968494415283, |
|
"eval_runtime": 239.7653, |
|
"eval_samples_per_second": 834.149, |
|
"eval_steps_per_second": 52.134, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.3057291507720947, |
|
"eval_runtime": 240.7073, |
|
"eval_samples_per_second": 830.885, |
|
"eval_steps_per_second": 51.93, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.4534, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.3085904121398926, |
|
"eval_runtime": 240.303, |
|
"eval_samples_per_second": 832.282, |
|
"eval_steps_per_second": 52.018, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.309332847595215, |
|
"eval_runtime": 240.8676, |
|
"eval_samples_per_second": 830.332, |
|
"eval_steps_per_second": 51.896, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.4486, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.3018343448638916, |
|
"eval_runtime": 241.3997, |
|
"eval_samples_per_second": 828.501, |
|
"eval_steps_per_second": 51.781, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.305995464324951, |
|
"eval_runtime": 239.2635, |
|
"eval_samples_per_second": 835.898, |
|
"eval_steps_per_second": 52.244, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.4457, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.3082966804504395, |
|
"eval_runtime": 239.5152, |
|
"eval_samples_per_second": 835.02, |
|
"eval_steps_per_second": 52.189, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 2.3109591007232666, |
|
"eval_runtime": 240.8072, |
|
"eval_samples_per_second": 830.54, |
|
"eval_steps_per_second": 51.909, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.4443, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.297455310821533, |
|
"eval_runtime": 240.6668, |
|
"eval_samples_per_second": 831.024, |
|
"eval_steps_per_second": 51.939, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.300872564315796, |
|
"eval_runtime": 240.6097, |
|
"eval_samples_per_second": 831.222, |
|
"eval_steps_per_second": 51.951, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.4405, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.3067097663879395, |
|
"eval_runtime": 240.2686, |
|
"eval_samples_per_second": 832.402, |
|
"eval_steps_per_second": 52.025, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.30265212059021, |
|
"eval_runtime": 240.64, |
|
"eval_samples_per_second": 831.117, |
|
"eval_steps_per_second": 51.945, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.4531, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.3050363063812256, |
|
"eval_runtime": 240.3135, |
|
"eval_samples_per_second": 832.246, |
|
"eval_steps_per_second": 52.015, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.302565574645996, |
|
"eval_runtime": 239.3844, |
|
"eval_samples_per_second": 835.476, |
|
"eval_steps_per_second": 52.217, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.4539, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2928755283355713, |
|
"eval_runtime": 241.5914, |
|
"eval_samples_per_second": 827.844, |
|
"eval_steps_per_second": 51.74, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.305102825164795, |
|
"eval_runtime": 241.5249, |
|
"eval_samples_per_second": 828.072, |
|
"eval_steps_per_second": 51.754, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.4499, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.3034677505493164, |
|
"eval_runtime": 241.4767, |
|
"eval_samples_per_second": 828.237, |
|
"eval_steps_per_second": 51.765, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.301123857498169, |
|
"eval_runtime": 239.2116, |
|
"eval_samples_per_second": 836.08, |
|
"eval_steps_per_second": 52.255, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.4401, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.291990280151367, |
|
"eval_runtime": 241.865, |
|
"eval_samples_per_second": 826.907, |
|
"eval_steps_per_second": 51.682, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.299881935119629, |
|
"eval_runtime": 239.8579, |
|
"eval_samples_per_second": 833.827, |
|
"eval_steps_per_second": 52.114, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.4401, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.303424119949341, |
|
"eval_runtime": 240.1835, |
|
"eval_samples_per_second": 832.697, |
|
"eval_steps_per_second": 52.044, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.30208683013916, |
|
"eval_runtime": 239.7131, |
|
"eval_samples_per_second": 834.331, |
|
"eval_steps_per_second": 52.146, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.4433, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.3102383613586426, |
|
"eval_runtime": 239.6539, |
|
"eval_samples_per_second": 834.537, |
|
"eval_steps_per_second": 52.159, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.2985267639160156, |
|
"eval_runtime": 239.3303, |
|
"eval_samples_per_second": 835.665, |
|
"eval_steps_per_second": 52.229, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.4445, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.3018200397491455, |
|
"eval_runtime": 239.5684, |
|
"eval_samples_per_second": 834.835, |
|
"eval_steps_per_second": 52.177, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.2995855808258057, |
|
"eval_runtime": 242.6094, |
|
"eval_samples_per_second": 824.37, |
|
"eval_steps_per_second": 51.523, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.4379, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.3006343841552734, |
|
"eval_runtime": 241.6485, |
|
"eval_samples_per_second": 827.648, |
|
"eval_steps_per_second": 51.728, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.2969799041748047, |
|
"eval_runtime": 240.3777, |
|
"eval_samples_per_second": 832.024, |
|
"eval_steps_per_second": 52.001, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.4454, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.30135178565979, |
|
"eval_runtime": 241.5787, |
|
"eval_samples_per_second": 827.887, |
|
"eval_steps_per_second": 51.743, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 2.2992091178894043, |
|
"eval_runtime": 241.4245, |
|
"eval_samples_per_second": 828.416, |
|
"eval_steps_per_second": 51.776, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.4457, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.2961883544921875, |
|
"eval_runtime": 242.857, |
|
"eval_samples_per_second": 823.53, |
|
"eval_steps_per_second": 51.471, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.300924301147461, |
|
"eval_runtime": 253.3263, |
|
"eval_samples_per_second": 789.495, |
|
"eval_steps_per_second": 49.343, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.4354, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.2960214614868164, |
|
"eval_runtime": 251.8236, |
|
"eval_samples_per_second": 794.207, |
|
"eval_steps_per_second": 49.638, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.3008430004119873, |
|
"eval_runtime": 255.1, |
|
"eval_samples_per_second": 784.006, |
|
"eval_steps_per_second": 49.0, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.4361, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.289783239364624, |
|
"eval_runtime": 253.1447, |
|
"eval_samples_per_second": 790.062, |
|
"eval_steps_per_second": 49.379, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.306014060974121, |
|
"eval_runtime": 253.3664, |
|
"eval_samples_per_second": 789.371, |
|
"eval_steps_per_second": 49.336, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.4377, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.298966884613037, |
|
"eval_runtime": 254.531, |
|
"eval_samples_per_second": 785.759, |
|
"eval_steps_per_second": 49.11, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.298924684524536, |
|
"eval_runtime": 253.6979, |
|
"eval_samples_per_second": 788.339, |
|
"eval_steps_per_second": 49.271, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.4416, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.296922206878662, |
|
"eval_runtime": 252.7348, |
|
"eval_samples_per_second": 791.343, |
|
"eval_steps_per_second": 49.459, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 2.2932701110839844, |
|
"eval_runtime": 249.9057, |
|
"eval_samples_per_second": 800.302, |
|
"eval_steps_per_second": 50.019, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.434, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.29982852935791, |
|
"eval_runtime": 250.7868, |
|
"eval_samples_per_second": 797.49, |
|
"eval_steps_per_second": 49.843, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 2.294802665710449, |
|
"eval_runtime": 251.6635, |
|
"eval_samples_per_second": 794.712, |
|
"eval_steps_per_second": 49.669, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.433, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.2891652584075928, |
|
"eval_runtime": 250.4466, |
|
"eval_samples_per_second": 798.574, |
|
"eval_steps_per_second": 49.911, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.292858362197876, |
|
"eval_runtime": 254.5309, |
|
"eval_samples_per_second": 785.759, |
|
"eval_steps_per_second": 49.11, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.44, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.2973103523254395, |
|
"eval_runtime": 250.641, |
|
"eval_samples_per_second": 797.954, |
|
"eval_steps_per_second": 49.872, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.292598009109497, |
|
"eval_runtime": 250.7654, |
|
"eval_samples_per_second": 797.558, |
|
"eval_steps_per_second": 49.847, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.4291, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.298957109451294, |
|
"eval_runtime": 249.6027, |
|
"eval_samples_per_second": 801.273, |
|
"eval_steps_per_second": 50.08, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 2.2936558723449707, |
|
"eval_runtime": 254.0075, |
|
"eval_samples_per_second": 787.378, |
|
"eval_steps_per_second": 49.211, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.4336, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.2894132137298584, |
|
"eval_runtime": 251.6948, |
|
"eval_samples_per_second": 794.613, |
|
"eval_steps_per_second": 49.663, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.295807123184204, |
|
"eval_runtime": 250.3917, |
|
"eval_samples_per_second": 798.748, |
|
"eval_steps_per_second": 49.922, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.439, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.295633554458618, |
|
"eval_runtime": 249.5836, |
|
"eval_samples_per_second": 801.335, |
|
"eval_steps_per_second": 50.083, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 2.2927510738372803, |
|
"eval_runtime": 252.6433, |
|
"eval_samples_per_second": 791.63, |
|
"eval_steps_per_second": 49.477, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.4405, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.295588970184326, |
|
"eval_runtime": 254.3928, |
|
"eval_samples_per_second": 786.186, |
|
"eval_steps_per_second": 49.137, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.290548324584961, |
|
"eval_runtime": 251.5538, |
|
"eval_samples_per_second": 795.058, |
|
"eval_steps_per_second": 49.691, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.4332, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.2921102046966553, |
|
"eval_runtime": 250.7706, |
|
"eval_samples_per_second": 797.542, |
|
"eval_steps_per_second": 49.846, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.2907326221466064, |
|
"eval_runtime": 252.731, |
|
"eval_samples_per_second": 791.355, |
|
"eval_steps_per_second": 49.46, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.4353, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.2879087924957275, |
|
"eval_runtime": 249.8593, |
|
"eval_samples_per_second": 800.45, |
|
"eval_steps_per_second": 50.028, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 2.292853355407715, |
|
"eval_runtime": 255.5473, |
|
"eval_samples_per_second": 782.634, |
|
"eval_steps_per_second": 48.915, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.4273, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.29532527923584, |
|
"eval_runtime": 249.121, |
|
"eval_samples_per_second": 802.823, |
|
"eval_steps_per_second": 50.176, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 2.2917468547821045, |
|
"eval_runtime": 252.7811, |
|
"eval_samples_per_second": 791.198, |
|
"eval_steps_per_second": 49.45, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.4233, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.294677734375, |
|
"eval_runtime": 251.4379, |
|
"eval_samples_per_second": 795.425, |
|
"eval_steps_per_second": 49.714, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 2.294262170791626, |
|
"eval_runtime": 249.692, |
|
"eval_samples_per_second": 800.987, |
|
"eval_steps_per_second": 50.062, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.4324, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.2940258979797363, |
|
"eval_runtime": 251.4645, |
|
"eval_samples_per_second": 795.341, |
|
"eval_steps_per_second": 49.709, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 2.291130781173706, |
|
"eval_runtime": 250.9475, |
|
"eval_samples_per_second": 796.979, |
|
"eval_steps_per_second": 49.811, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.4461, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.2919955253601074, |
|
"eval_runtime": 251.5475, |
|
"eval_samples_per_second": 795.078, |
|
"eval_steps_per_second": 49.692, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.291116952896118, |
|
"eval_runtime": 247.564, |
|
"eval_samples_per_second": 807.872, |
|
"eval_steps_per_second": 50.492, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.4267, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.294036865234375, |
|
"eval_runtime": 248.8076, |
|
"eval_samples_per_second": 803.834, |
|
"eval_steps_per_second": 50.24, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 2.2889564037323, |
|
"eval_runtime": 250.7173, |
|
"eval_samples_per_second": 797.711, |
|
"eval_steps_per_second": 49.857, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.4313, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.2913272380828857, |
|
"eval_runtime": 251.8602, |
|
"eval_samples_per_second": 794.091, |
|
"eval_steps_per_second": 49.631, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.296712875366211, |
|
"eval_runtime": 250.3469, |
|
"eval_samples_per_second": 798.891, |
|
"eval_steps_per_second": 49.931, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.4388, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.29068660736084, |
|
"eval_runtime": 252.8984, |
|
"eval_samples_per_second": 790.831, |
|
"eval_steps_per_second": 49.427, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.295198440551758, |
|
"eval_runtime": 250.9204, |
|
"eval_samples_per_second": 797.065, |
|
"eval_steps_per_second": 49.817, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.4326, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.287299394607544, |
|
"eval_runtime": 250.3015, |
|
"eval_samples_per_second": 799.036, |
|
"eval_steps_per_second": 49.94, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 2.287095308303833, |
|
"eval_runtime": 251.9856, |
|
"eval_samples_per_second": 793.696, |
|
"eval_steps_per_second": 49.606, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.4312, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.2880072593688965, |
|
"eval_runtime": 250.0793, |
|
"eval_samples_per_second": 799.746, |
|
"eval_steps_per_second": 49.984, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.2941174507141113, |
|
"eval_runtime": 250.5896, |
|
"eval_samples_per_second": 798.118, |
|
"eval_steps_per_second": 49.882, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.4398, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.292538642883301, |
|
"eval_runtime": 248.6711, |
|
"eval_samples_per_second": 804.275, |
|
"eval_steps_per_second": 50.267, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 2.296539545059204, |
|
"eval_runtime": 250.5232, |
|
"eval_samples_per_second": 798.329, |
|
"eval_steps_per_second": 49.896, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.441, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.289044141769409, |
|
"eval_runtime": 251.7273, |
|
"eval_samples_per_second": 794.511, |
|
"eval_steps_per_second": 49.657, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": 2.2945752143859863, |
|
"eval_runtime": 250.3966, |
|
"eval_samples_per_second": 798.733, |
|
"eval_steps_per_second": 49.921, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.4345, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.291003704071045, |
|
"eval_runtime": 250.1466, |
|
"eval_samples_per_second": 799.531, |
|
"eval_steps_per_second": 49.971, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 2.279160737991333, |
|
"eval_runtime": 251.3138, |
|
"eval_samples_per_second": 795.818, |
|
"eval_steps_per_second": 49.739, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.4332, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.285647392272949, |
|
"eval_runtime": 251.6666, |
|
"eval_samples_per_second": 794.702, |
|
"eval_steps_per_second": 49.669, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_loss": 2.2878894805908203, |
|
"eval_runtime": 249.5029, |
|
"eval_samples_per_second": 801.594, |
|
"eval_steps_per_second": 50.1, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.4375, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.2861104011535645, |
|
"eval_runtime": 249.5689, |
|
"eval_samples_per_second": 801.382, |
|
"eval_steps_per_second": 50.086, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 2.2892367839813232, |
|
"eval_runtime": 250.1781, |
|
"eval_samples_per_second": 799.43, |
|
"eval_steps_per_second": 49.964, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.4282, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.2894413471221924, |
|
"eval_runtime": 252.3508, |
|
"eval_samples_per_second": 792.548, |
|
"eval_steps_per_second": 49.534, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 2.290153980255127, |
|
"eval_runtime": 250.4413, |
|
"eval_samples_per_second": 798.59, |
|
"eval_steps_per_second": 49.912, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.4231, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.282951831817627, |
|
"eval_runtime": 251.0777, |
|
"eval_samples_per_second": 796.566, |
|
"eval_steps_per_second": 49.785, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 2.294783353805542, |
|
"eval_runtime": 249.9563, |
|
"eval_samples_per_second": 800.14, |
|
"eval_steps_per_second": 50.009, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.4299, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.2915213108062744, |
|
"eval_runtime": 251.5141, |
|
"eval_samples_per_second": 795.184, |
|
"eval_steps_per_second": 49.699, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 2.2921574115753174, |
|
"eval_runtime": 250.6578, |
|
"eval_samples_per_second": 797.9, |
|
"eval_steps_per_second": 49.869, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.4353, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.287623882293701, |
|
"eval_runtime": 249.9183, |
|
"eval_samples_per_second": 800.262, |
|
"eval_steps_per_second": 50.016, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 2.2892725467681885, |
|
"eval_runtime": 249.4102, |
|
"eval_samples_per_second": 801.892, |
|
"eval_steps_per_second": 50.118, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.4308, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.292046070098877, |
|
"eval_runtime": 247.5504, |
|
"eval_samples_per_second": 807.916, |
|
"eval_steps_per_second": 50.495, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 2.285968780517578, |
|
"eval_runtime": 247.5335, |
|
"eval_samples_per_second": 807.971, |
|
"eval_steps_per_second": 50.498, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.4358, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.2907297611236572, |
|
"eval_runtime": 248.9275, |
|
"eval_samples_per_second": 803.447, |
|
"eval_steps_per_second": 50.215, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 2.2807979583740234, |
|
"eval_runtime": 249.3116, |
|
"eval_samples_per_second": 802.209, |
|
"eval_steps_per_second": 50.138, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.4341, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.290224313735962, |
|
"eval_runtime": 249.5198, |
|
"eval_samples_per_second": 801.54, |
|
"eval_steps_per_second": 50.096, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 2.281525135040283, |
|
"eval_runtime": 249.0717, |
|
"eval_samples_per_second": 802.982, |
|
"eval_steps_per_second": 50.186, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.4315, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.296067237854004, |
|
"eval_runtime": 246.4788, |
|
"eval_samples_per_second": 811.429, |
|
"eval_steps_per_second": 50.714, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 2.288472890853882, |
|
"eval_runtime": 243.6257, |
|
"eval_samples_per_second": 820.932, |
|
"eval_steps_per_second": 51.308, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.434, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.2916722297668457, |
|
"eval_runtime": 242.6981, |
|
"eval_samples_per_second": 824.069, |
|
"eval_steps_per_second": 51.504, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 2.285139560699463, |
|
"eval_runtime": 243.3147, |
|
"eval_samples_per_second": 821.981, |
|
"eval_steps_per_second": 51.374, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.4324, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.2837140560150146, |
|
"eval_runtime": 243.7667, |
|
"eval_samples_per_second": 820.457, |
|
"eval_steps_per_second": 51.279, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.288276195526123, |
|
"eval_runtime": 243.0553, |
|
"eval_samples_per_second": 822.858, |
|
"eval_steps_per_second": 51.429, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.4297, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.282362222671509, |
|
"eval_runtime": 243.4119, |
|
"eval_samples_per_second": 821.652, |
|
"eval_steps_per_second": 51.353, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 2.283245086669922, |
|
"eval_runtime": 242.8004, |
|
"eval_samples_per_second": 823.722, |
|
"eval_steps_per_second": 51.483, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.436, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.286545515060425, |
|
"eval_runtime": 243.7626, |
|
"eval_samples_per_second": 820.471, |
|
"eval_steps_per_second": 51.279, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.281639337539673, |
|
"eval_runtime": 242.7206, |
|
"eval_samples_per_second": 823.993, |
|
"eval_steps_per_second": 51.5, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.4329, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.2862467765808105, |
|
"eval_runtime": 242.9508, |
|
"eval_samples_per_second": 823.212, |
|
"eval_steps_per_second": 51.451, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.284660577774048, |
|
"eval_runtime": 244.035, |
|
"eval_samples_per_second": 819.554, |
|
"eval_steps_per_second": 51.222, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.4276, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.295132875442505, |
|
"eval_runtime": 243.547, |
|
"eval_samples_per_second": 821.197, |
|
"eval_steps_per_second": 51.325, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.297971487045288, |
|
"eval_runtime": 243.0459, |
|
"eval_samples_per_second": 822.89, |
|
"eval_steps_per_second": 51.431, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.4362, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.2889420986175537, |
|
"eval_runtime": 245.3823, |
|
"eval_samples_per_second": 815.055, |
|
"eval_steps_per_second": 50.941, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.2913663387298584, |
|
"eval_runtime": 243.0295, |
|
"eval_samples_per_second": 822.945, |
|
"eval_steps_per_second": 51.434, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.4309, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 2.2914836406707764, |
|
"eval_runtime": 245.1791, |
|
"eval_samples_per_second": 815.73, |
|
"eval_steps_per_second": 50.983, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 2.2822399139404297, |
|
"eval_runtime": 243.1242, |
|
"eval_samples_per_second": 822.625, |
|
"eval_steps_per_second": 51.414, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.4414, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.287137269973755, |
|
"eval_runtime": 243.2167, |
|
"eval_samples_per_second": 822.312, |
|
"eval_steps_per_second": 51.395, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 2.2890405654907227, |
|
"eval_runtime": 246.4538, |
|
"eval_samples_per_second": 811.511, |
|
"eval_steps_per_second": 50.719, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.4241, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.2844035625457764, |
|
"eval_runtime": 243.8151, |
|
"eval_samples_per_second": 820.294, |
|
"eval_steps_per_second": 51.268, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 2.281158924102783, |
|
"eval_runtime": 244.4232, |
|
"eval_samples_per_second": 818.253, |
|
"eval_steps_per_second": 51.141, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.4251, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.2873995304107666, |
|
"eval_runtime": 245.6666, |
|
"eval_samples_per_second": 814.111, |
|
"eval_steps_per_second": 50.882, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 2.2846264839172363, |
|
"eval_runtime": 244.5062, |
|
"eval_samples_per_second": 817.975, |
|
"eval_steps_per_second": 51.123, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.4318, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.283116340637207, |
|
"eval_runtime": 245.4301, |
|
"eval_samples_per_second": 814.896, |
|
"eval_steps_per_second": 50.931, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 2.2895309925079346, |
|
"eval_runtime": 244.3142, |
|
"eval_samples_per_second": 818.618, |
|
"eval_steps_per_second": 51.164, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.4247, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.2795751094818115, |
|
"eval_runtime": 246.3547, |
|
"eval_samples_per_second": 811.838, |
|
"eval_steps_per_second": 50.74, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 2.2833728790283203, |
|
"eval_runtime": 244.3038, |
|
"eval_samples_per_second": 818.653, |
|
"eval_steps_per_second": 51.166, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.4305, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.2810542583465576, |
|
"eval_runtime": 245.8027, |
|
"eval_samples_per_second": 813.661, |
|
"eval_steps_per_second": 50.854, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 2.2922263145446777, |
|
"eval_runtime": 244.3162, |
|
"eval_samples_per_second": 818.611, |
|
"eval_steps_per_second": 51.163, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.4336, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.2830028533935547, |
|
"eval_runtime": 245.6363, |
|
"eval_samples_per_second": 814.212, |
|
"eval_steps_per_second": 50.888, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 2.2903530597686768, |
|
"eval_runtime": 244.3808, |
|
"eval_samples_per_second": 818.395, |
|
"eval_steps_per_second": 51.15, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.428, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.284269332885742, |
|
"eval_runtime": 243.8962, |
|
"eval_samples_per_second": 820.021, |
|
"eval_steps_per_second": 51.251, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 2.2804477214813232, |
|
"eval_runtime": 243.7523, |
|
"eval_samples_per_second": 820.505, |
|
"eval_steps_per_second": 51.282, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.4254, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.285198211669922, |
|
"eval_runtime": 244.494, |
|
"eval_samples_per_second": 818.016, |
|
"eval_steps_per_second": 51.126, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.285767078399658, |
|
"eval_runtime": 245.3316, |
|
"eval_samples_per_second": 815.223, |
|
"eval_steps_per_second": 50.951, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.4287, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.2921693325042725, |
|
"eval_runtime": 246.0396, |
|
"eval_samples_per_second": 812.877, |
|
"eval_steps_per_second": 50.805, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 2.2846784591674805, |
|
"eval_runtime": 243.9013, |
|
"eval_samples_per_second": 820.004, |
|
"eval_steps_per_second": 51.25, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.4291, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.285550117492676, |
|
"eval_runtime": 244.514, |
|
"eval_samples_per_second": 817.949, |
|
"eval_steps_per_second": 51.122, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.287602186203003, |
|
"eval_runtime": 245.7265, |
|
"eval_samples_per_second": 813.913, |
|
"eval_steps_per_second": 50.87, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.4289, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.282216787338257, |
|
"eval_runtime": 247.3389, |
|
"eval_samples_per_second": 808.607, |
|
"eval_steps_per_second": 50.538, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.2787294387817383, |
|
"eval_runtime": 247.7887, |
|
"eval_samples_per_second": 807.139, |
|
"eval_steps_per_second": 50.446, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.4272, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.2810943126678467, |
|
"eval_runtime": 244.5008, |
|
"eval_samples_per_second": 817.993, |
|
"eval_steps_per_second": 51.125, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 2.285348415374756, |
|
"eval_runtime": 246.9617, |
|
"eval_samples_per_second": 809.842, |
|
"eval_steps_per_second": 50.615, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.4267, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.2818119525909424, |
|
"eval_runtime": 245.9708, |
|
"eval_samples_per_second": 813.105, |
|
"eval_steps_per_second": 50.819, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.2764692306518555, |
|
"eval_runtime": 249.5295, |
|
"eval_samples_per_second": 801.509, |
|
"eval_steps_per_second": 50.094, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.4237, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.2790732383728027, |
|
"eval_runtime": 247.115, |
|
"eval_samples_per_second": 809.34, |
|
"eval_steps_per_second": 50.584, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 2.2768290042877197, |
|
"eval_runtime": 246.6491, |
|
"eval_samples_per_second": 810.868, |
|
"eval_steps_per_second": 50.679, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.4277, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.286600351333618, |
|
"eval_runtime": 247.5007, |
|
"eval_samples_per_second": 808.078, |
|
"eval_steps_per_second": 50.505, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 2.282073974609375, |
|
"eval_runtime": 245.5734, |
|
"eval_samples_per_second": 814.42, |
|
"eval_steps_per_second": 50.901, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.4316, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.285618305206299, |
|
"eval_runtime": 245.7388, |
|
"eval_samples_per_second": 813.872, |
|
"eval_steps_per_second": 50.867, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 2.2820215225219727, |
|
"eval_runtime": 247.4957, |
|
"eval_samples_per_second": 808.095, |
|
"eval_steps_per_second": 50.506, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.4222, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.2891499996185303, |
|
"eval_runtime": 245.6614, |
|
"eval_samples_per_second": 814.129, |
|
"eval_steps_per_second": 50.883, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 2.2802817821502686, |
|
"eval_runtime": 246.4523, |
|
"eval_samples_per_second": 811.516, |
|
"eval_steps_per_second": 50.72, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.426, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.279686689376831, |
|
"eval_runtime": 247.3876, |
|
"eval_samples_per_second": 808.448, |
|
"eval_steps_per_second": 50.528, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 2.284360408782959, |
|
"eval_runtime": 246.8856, |
|
"eval_samples_per_second": 810.092, |
|
"eval_steps_per_second": 50.631, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.422, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.287179708480835, |
|
"eval_runtime": 245.0018, |
|
"eval_samples_per_second": 816.321, |
|
"eval_steps_per_second": 51.02, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 2.2903947830200195, |
|
"eval_runtime": 244.8984, |
|
"eval_samples_per_second": 816.665, |
|
"eval_steps_per_second": 51.042, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.4323, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.2796852588653564, |
|
"eval_runtime": 245.135, |
|
"eval_samples_per_second": 815.877, |
|
"eval_steps_per_second": 50.992, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 2.275681495666504, |
|
"eval_runtime": 247.5616, |
|
"eval_samples_per_second": 807.88, |
|
"eval_steps_per_second": 50.492, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.4315, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.287421703338623, |
|
"eval_runtime": 247.5879, |
|
"eval_samples_per_second": 807.794, |
|
"eval_steps_per_second": 50.487, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 2.276327133178711, |
|
"eval_runtime": 245.1196, |
|
"eval_samples_per_second": 815.928, |
|
"eval_steps_per_second": 50.996, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.421, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 2.2857308387756348, |
|
"eval_runtime": 245.1941, |
|
"eval_samples_per_second": 815.68, |
|
"eval_steps_per_second": 50.98, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 2.280374765396118, |
|
"eval_runtime": 247.8166, |
|
"eval_samples_per_second": 807.048, |
|
"eval_steps_per_second": 50.441, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.4299, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 2.282515287399292, |
|
"eval_runtime": 245.4773, |
|
"eval_samples_per_second": 814.739, |
|
"eval_steps_per_second": 50.921, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.2819290161132812, |
|
"eval_runtime": 247.6539, |
|
"eval_samples_per_second": 807.579, |
|
"eval_steps_per_second": 50.474, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.4289, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 2.28240704536438, |
|
"eval_runtime": 245.8148, |
|
"eval_samples_per_second": 813.621, |
|
"eval_steps_per_second": 50.851, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 2.282073974609375, |
|
"eval_runtime": 247.9192, |
|
"eval_samples_per_second": 806.714, |
|
"eval_steps_per_second": 50.42, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.4257, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 2.2802059650421143, |
|
"eval_runtime": 248.4226, |
|
"eval_samples_per_second": 805.08, |
|
"eval_steps_per_second": 50.317, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 2.2760419845581055, |
|
"eval_runtime": 246.2143, |
|
"eval_samples_per_second": 812.3, |
|
"eval_steps_per_second": 50.769, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.4227, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 2.28104305267334, |
|
"eval_runtime": 247.8478, |
|
"eval_samples_per_second": 806.947, |
|
"eval_steps_per_second": 50.434, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.2776732444763184, |
|
"eval_runtime": 245.7431, |
|
"eval_samples_per_second": 813.858, |
|
"eval_steps_per_second": 50.866, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.4287, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 2.277235984802246, |
|
"eval_runtime": 245.5934, |
|
"eval_samples_per_second": 814.354, |
|
"eval_steps_per_second": 50.897, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_loss": 2.2786269187927246, |
|
"eval_runtime": 246.4934, |
|
"eval_samples_per_second": 811.381, |
|
"eval_steps_per_second": 50.711, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.4227, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 2.285905361175537, |
|
"eval_runtime": 246.4981, |
|
"eval_samples_per_second": 811.365, |
|
"eval_steps_per_second": 50.71, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.2862019538879395, |
|
"eval_runtime": 247.148, |
|
"eval_samples_per_second": 809.232, |
|
"eval_steps_per_second": 50.577, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.4262, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.278900146484375, |
|
"eval_runtime": 246.4039, |
|
"eval_samples_per_second": 811.675, |
|
"eval_steps_per_second": 50.73, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 2.2848124504089355, |
|
"eval_runtime": 246.3089, |
|
"eval_samples_per_second": 811.988, |
|
"eval_steps_per_second": 50.749, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.4263, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.275432586669922, |
|
"eval_runtime": 246.8456, |
|
"eval_samples_per_second": 810.223, |
|
"eval_steps_per_second": 50.639, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 2.2777817249298096, |
|
"eval_runtime": 247.2848, |
|
"eval_samples_per_second": 808.784, |
|
"eval_steps_per_second": 50.549, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.4246, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.27353572845459, |
|
"eval_runtime": 247.6065, |
|
"eval_samples_per_second": 807.733, |
|
"eval_steps_per_second": 50.483, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.2827255725860596, |
|
"eval_runtime": 246.5965, |
|
"eval_samples_per_second": 811.041, |
|
"eval_steps_per_second": 50.69, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.4147, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 2.285012722015381, |
|
"eval_runtime": 246.8181, |
|
"eval_samples_per_second": 810.313, |
|
"eval_steps_per_second": 50.645, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 2.282083511352539, |
|
"eval_runtime": 246.9583, |
|
"eval_samples_per_second": 809.853, |
|
"eval_steps_per_second": 50.616, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.4288, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 2.280324935913086, |
|
"eval_runtime": 246.1706, |
|
"eval_samples_per_second": 812.445, |
|
"eval_steps_per_second": 50.778, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 2.2760393619537354, |
|
"eval_runtime": 246.0656, |
|
"eval_samples_per_second": 812.791, |
|
"eval_steps_per_second": 50.799, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.4231, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 2.2748613357543945, |
|
"eval_runtime": 246.3683, |
|
"eval_samples_per_second": 811.793, |
|
"eval_steps_per_second": 50.737, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 2.2749080657958984, |
|
"eval_runtime": 246.3319, |
|
"eval_samples_per_second": 811.913, |
|
"eval_steps_per_second": 50.745, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.4243, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 2.274254322052002, |
|
"eval_runtime": 245.7767, |
|
"eval_samples_per_second": 813.747, |
|
"eval_steps_per_second": 50.859, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 2.2792415618896484, |
|
"eval_runtime": 247.2515, |
|
"eval_samples_per_second": 808.893, |
|
"eval_steps_per_second": 50.556, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.4215, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 2.275153398513794, |
|
"eval_runtime": 247.4479, |
|
"eval_samples_per_second": 808.251, |
|
"eval_steps_per_second": 50.516, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 2.2769758701324463, |
|
"eval_runtime": 247.8276, |
|
"eval_samples_per_second": 807.013, |
|
"eval_steps_per_second": 50.438, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.4213, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 2.280165672302246, |
|
"eval_runtime": 246.5191, |
|
"eval_samples_per_second": 811.296, |
|
"eval_steps_per_second": 50.706, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 2.27955961227417, |
|
"eval_runtime": 248.3255, |
|
"eval_samples_per_second": 805.394, |
|
"eval_steps_per_second": 50.337, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.4236, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 2.2882931232452393, |
|
"eval_runtime": 246.4462, |
|
"eval_samples_per_second": 811.536, |
|
"eval_steps_per_second": 50.721, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 2.279242515563965, |
|
"eval_runtime": 246.5281, |
|
"eval_samples_per_second": 811.266, |
|
"eval_steps_per_second": 50.704, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.4237, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 2.272642135620117, |
|
"eval_runtime": 247.5094, |
|
"eval_samples_per_second": 808.05, |
|
"eval_steps_per_second": 50.503, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 2.2816178798675537, |
|
"eval_runtime": 248.3802, |
|
"eval_samples_per_second": 805.217, |
|
"eval_steps_per_second": 50.326, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.4183, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 2.2790186405181885, |
|
"eval_runtime": 247.7385, |
|
"eval_samples_per_second": 807.303, |
|
"eval_steps_per_second": 50.456, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 2.2814745903015137, |
|
"eval_runtime": 248.3044, |
|
"eval_samples_per_second": 805.463, |
|
"eval_steps_per_second": 50.341, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.4215, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.277374744415283, |
|
"eval_runtime": 246.8648, |
|
"eval_samples_per_second": 810.16, |
|
"eval_steps_per_second": 50.635, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 2.270047664642334, |
|
"eval_runtime": 246.8413, |
|
"eval_samples_per_second": 810.237, |
|
"eval_steps_per_second": 50.64, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.4258, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 2.276344060897827, |
|
"eval_runtime": 246.9748, |
|
"eval_samples_per_second": 809.799, |
|
"eval_steps_per_second": 50.612, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 2.278593063354492, |
|
"eval_runtime": 248.7283, |
|
"eval_samples_per_second": 804.09, |
|
"eval_steps_per_second": 50.256, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.4209, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 2.2763326168060303, |
|
"eval_runtime": 247.5692, |
|
"eval_samples_per_second": 807.855, |
|
"eval_steps_per_second": 50.491, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 2.278881072998047, |
|
"eval_runtime": 247.8151, |
|
"eval_samples_per_second": 807.053, |
|
"eval_steps_per_second": 50.441, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.4217, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 2.2783920764923096, |
|
"eval_runtime": 248.9593, |
|
"eval_samples_per_second": 803.344, |
|
"eval_steps_per_second": 50.209, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 2.2773079872131348, |
|
"eval_runtime": 247.6818, |
|
"eval_samples_per_second": 807.488, |
|
"eval_steps_per_second": 50.468, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.4279, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 2.286058187484741, |
|
"eval_runtime": 247.013, |
|
"eval_samples_per_second": 809.674, |
|
"eval_steps_per_second": 50.605, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 2.2728214263916016, |
|
"eval_runtime": 247.152, |
|
"eval_samples_per_second": 809.219, |
|
"eval_steps_per_second": 50.576, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.4268, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.276221513748169, |
|
"eval_runtime": 247.3718, |
|
"eval_samples_per_second": 808.5, |
|
"eval_steps_per_second": 50.531, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 2.2788944244384766, |
|
"eval_runtime": 247.8322, |
|
"eval_samples_per_second": 806.998, |
|
"eval_steps_per_second": 50.437, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.4177, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 2.282210350036621, |
|
"eval_runtime": 247.9565, |
|
"eval_samples_per_second": 806.593, |
|
"eval_steps_per_second": 50.412, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 2.275873899459839, |
|
"eval_runtime": 247.5408, |
|
"eval_samples_per_second": 807.948, |
|
"eval_steps_per_second": 50.497, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.4166, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.2791593074798584, |
|
"eval_runtime": 248.9215, |
|
"eval_samples_per_second": 803.466, |
|
"eval_steps_per_second": 50.217, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.272076368331909, |
|
"eval_runtime": 248.0422, |
|
"eval_samples_per_second": 806.315, |
|
"eval_steps_per_second": 50.395, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.4223, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 2.2768054008483887, |
|
"eval_runtime": 247.9612, |
|
"eval_samples_per_second": 806.578, |
|
"eval_steps_per_second": 50.411, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 2.2726008892059326, |
|
"eval_runtime": 247.7176, |
|
"eval_samples_per_second": 807.371, |
|
"eval_steps_per_second": 50.461, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.4139, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 2.282505512237549, |
|
"eval_runtime": 247.6026, |
|
"eval_samples_per_second": 807.746, |
|
"eval_steps_per_second": 50.484, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 2.2738897800445557, |
|
"eval_runtime": 248.4813, |
|
"eval_samples_per_second": 804.89, |
|
"eval_steps_per_second": 50.306, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.4236, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 2.2834410667419434, |
|
"eval_runtime": 248.3802, |
|
"eval_samples_per_second": 805.217, |
|
"eval_steps_per_second": 50.326, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 2.2750093936920166, |
|
"eval_runtime": 247.5076, |
|
"eval_samples_per_second": 808.056, |
|
"eval_steps_per_second": 50.503, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.4235, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 2.275216817855835, |
|
"eval_runtime": 248.1106, |
|
"eval_samples_per_second": 806.092, |
|
"eval_steps_per_second": 50.381, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_loss": 2.28031587600708, |
|
"eval_runtime": 247.6614, |
|
"eval_samples_per_second": 807.554, |
|
"eval_steps_per_second": 50.472, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.4193, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 2.2762908935546875, |
|
"eval_runtime": 247.8319, |
|
"eval_samples_per_second": 806.999, |
|
"eval_steps_per_second": 50.437, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 2.2754592895507812, |
|
"eval_runtime": 247.5134, |
|
"eval_samples_per_second": 808.037, |
|
"eval_steps_per_second": 50.502, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.4179, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 2.279421091079712, |
|
"eval_runtime": 248.0477, |
|
"eval_samples_per_second": 806.297, |
|
"eval_steps_per_second": 50.394, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 2.2710611820220947, |
|
"eval_runtime": 247.3731, |
|
"eval_samples_per_second": 808.495, |
|
"eval_steps_per_second": 50.531, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.4181, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 2.279233932495117, |
|
"eval_runtime": 248.7527, |
|
"eval_samples_per_second": 804.011, |
|
"eval_steps_per_second": 50.251, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 2.275193691253662, |
|
"eval_runtime": 248.241, |
|
"eval_samples_per_second": 805.669, |
|
"eval_steps_per_second": 50.354, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.4173, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 2.2775352001190186, |
|
"eval_runtime": 248.2377, |
|
"eval_samples_per_second": 805.679, |
|
"eval_steps_per_second": 50.355, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 2.2751924991607666, |
|
"eval_runtime": 248.4267, |
|
"eval_samples_per_second": 805.066, |
|
"eval_steps_per_second": 50.317, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.4242, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 2.272923469543457, |
|
"eval_runtime": 248.2584, |
|
"eval_samples_per_second": 805.612, |
|
"eval_steps_per_second": 50.351, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 2.2793116569519043, |
|
"eval_runtime": 248.8342, |
|
"eval_samples_per_second": 803.748, |
|
"eval_steps_per_second": 50.234, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.4166, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 2.2719197273254395, |
|
"eval_runtime": 249.6006, |
|
"eval_samples_per_second": 801.28, |
|
"eval_steps_per_second": 50.08, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.281975507736206, |
|
"eval_runtime": 248.1074, |
|
"eval_samples_per_second": 806.103, |
|
"eval_steps_per_second": 50.381, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.4181, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 2.2715933322906494, |
|
"eval_runtime": 249.4343, |
|
"eval_samples_per_second": 801.814, |
|
"eval_steps_per_second": 50.113, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 2.285536050796509, |
|
"eval_runtime": 250.2462, |
|
"eval_samples_per_second": 799.213, |
|
"eval_steps_per_second": 49.951, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.4245, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 2.2805118560791016, |
|
"eval_runtime": 248.8621, |
|
"eval_samples_per_second": 803.658, |
|
"eval_steps_per_second": 50.229, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 2.272109270095825, |
|
"eval_runtime": 250.3664, |
|
"eval_samples_per_second": 798.829, |
|
"eval_steps_per_second": 49.927, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.4204, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 2.270662546157837, |
|
"eval_runtime": 248.4336, |
|
"eval_samples_per_second": 805.044, |
|
"eval_steps_per_second": 50.315, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 2.2766847610473633, |
|
"eval_runtime": 248.6132, |
|
"eval_samples_per_second": 804.462, |
|
"eval_steps_per_second": 50.279, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.4255, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 2.2710325717926025, |
|
"eval_runtime": 248.6879, |
|
"eval_samples_per_second": 804.221, |
|
"eval_steps_per_second": 50.264, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 2.2814137935638428, |
|
"eval_runtime": 249.0563, |
|
"eval_samples_per_second": 803.031, |
|
"eval_steps_per_second": 50.189, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.4254, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 2.274559259414673, |
|
"eval_runtime": 249.1887, |
|
"eval_samples_per_second": 802.605, |
|
"eval_steps_per_second": 50.163, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 2.2766289710998535, |
|
"eval_runtime": 248.6168, |
|
"eval_samples_per_second": 804.451, |
|
"eval_steps_per_second": 50.278, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.4232, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 2.2724950313568115, |
|
"eval_runtime": 248.9678, |
|
"eval_samples_per_second": 803.317, |
|
"eval_steps_per_second": 50.207, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 2.2764828205108643, |
|
"eval_runtime": 248.7636, |
|
"eval_samples_per_second": 803.976, |
|
"eval_steps_per_second": 50.249, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.4189, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 2.275614023208618, |
|
"eval_runtime": 249.9482, |
|
"eval_samples_per_second": 800.166, |
|
"eval_steps_per_second": 50.01, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 2.2767763137817383, |
|
"eval_runtime": 248.8725, |
|
"eval_samples_per_second": 803.624, |
|
"eval_steps_per_second": 50.227, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.4105, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.280364751815796, |
|
"eval_runtime": 248.8287, |
|
"eval_samples_per_second": 803.766, |
|
"eval_steps_per_second": 50.235, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 2.2873153686523438, |
|
"eval_runtime": 248.9747, |
|
"eval_samples_per_second": 803.295, |
|
"eval_steps_per_second": 50.206, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.415, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 2.278338670730591, |
|
"eval_runtime": 248.9743, |
|
"eval_samples_per_second": 803.296, |
|
"eval_steps_per_second": 50.206, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 2.2737488746643066, |
|
"eval_runtime": 250.6409, |
|
"eval_samples_per_second": 797.954, |
|
"eval_steps_per_second": 49.872, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.4174, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 2.278608798980713, |
|
"eval_runtime": 251.8413, |
|
"eval_samples_per_second": 794.151, |
|
"eval_steps_per_second": 49.634, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 2.2729651927948, |
|
"eval_runtime": 250.8498, |
|
"eval_samples_per_second": 797.29, |
|
"eval_steps_per_second": 49.831, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.4199, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 2.2793538570404053, |
|
"eval_runtime": 253.8866, |
|
"eval_samples_per_second": 787.753, |
|
"eval_steps_per_second": 49.235, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.284799337387085, |
|
"eval_runtime": 252.1889, |
|
"eval_samples_per_second": 793.056, |
|
"eval_steps_per_second": 49.566, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.4224, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 2.2810616493225098, |
|
"eval_runtime": 251.6013, |
|
"eval_samples_per_second": 794.909, |
|
"eval_steps_per_second": 49.682, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 2.281796932220459, |
|
"eval_runtime": 251.0825, |
|
"eval_samples_per_second": 796.551, |
|
"eval_steps_per_second": 49.784, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0, |
|
"loss": 2.4226, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 2.279843330383301, |
|
"eval_runtime": 250.0989, |
|
"eval_samples_per_second": 799.684, |
|
"eval_steps_per_second": 49.98, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"step": 2400000, |
|
"total_flos": 8.368611666112401e+17, |
|
"train_loss": 2.4345372688802085, |
|
"train_runtime": 232385.3236, |
|
"train_samples_per_second": 165.243, |
|
"train_steps_per_second": 10.328 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 11, |
|
"save_steps": 32000, |
|
"total_flos": 8.368611666112401e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|