{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.99889502762431, "global_step": 5763, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "gpu_memory": 2825061888, "learning_rate": 8.32e-06, "loss": 4.6062, "step": 128 }, { "epoch": 0.75, "gpu_memory": 2903643648, "learning_rate": 1.664e-05, "loss": 2.7746, "step": 256 }, { "epoch": 1.0, "eval_bp": 0.021341648192077716, "eval_counts": [ 342, 58, 18, 6 ], "eval_loss": 2.035790205001831, "eval_precisions": [ 34.862385321100916, 8.516886930983848, 4.651162790697675, 2.3529411764705883 ], "eval_ref_len": 4755, "eval_runtime": 35.7733, "eval_samples_per_second": 8.386, "eval_score": 0.16113155714674393, "eval_steps_per_second": 8.386, "eval_sys_len": 981, "eval_totals": [ 981, 681, 387, 255 ], "gpu_memory": 2903643648, "step": 339 }, { "epoch": 1.13, "gpu_memory": 2903643648, "learning_rate": 2.4959999999999998e-05, "loss": 2.2201, "step": 384 }, { "epoch": 1.51, "gpu_memory": 2903643648, "learning_rate": 3.2437898089171974e-05, "loss": 1.9599, "step": 512 }, { "epoch": 1.89, "gpu_memory": 2903643648, "learning_rate": 3.1775477707006364e-05, "loss": 1.8228, "step": 640 }, { "epoch": 2.0, "eval_bp": 0.1919535866757935, "eval_counts": [ 640, 199, 91, 36 ], "eval_loss": 1.740516185760498, "eval_precisions": [ 35.67447045707915, 13.319946452476573, 7.526881720430108, 3.896103896103896 ], "eval_ref_len": 4755, "eval_runtime": 51.5291, "eval_samples_per_second": 5.822, "eval_score": 2.0855597670386987, "eval_steps_per_second": 5.822, "eval_sys_len": 1794, "eval_totals": [ 1794, 1494, 1209, 924 ], "gpu_memory": 2903643648, "step": 678 }, { "epoch": 2.27, "gpu_memory": 2903643648, "learning_rate": 3.111305732484076e-05, "loss": 1.7275, "step": 768 }, { "epoch": 2.64, "gpu_memory": 2903643648, "learning_rate": 3.0450636942675155e-05, "loss": 1.614, "step": 896 }, { "epoch": 3.0, "eval_bp": 0.157930307305936, "eval_counts": [ 662, 239, 127, 66 ], "eval_loss": 1.6653738021850586, "eval_precisions": [ 39.61699581089168, 17.432530999270604, 11.598173515981735, 8.02919708029197 ], "eval_ref_len": 4755, "eval_runtime": 46.1755, "eval_samples_per_second": 6.497, "eval_score": 2.515019790343611, "eval_steps_per_second": 6.497, "eval_sys_len": 1671, "eval_totals": [ 1671, 1371, 1095, 822 ], "gpu_memory": 2903643648, "step": 1017 }, { "epoch": 3.02, "gpu_memory": 2903643648, "learning_rate": 2.9788216560509553e-05, "loss": 1.561, "step": 1024 }, { "epoch": 3.4, "gpu_memory": 2903643648, "learning_rate": 2.9125796178343946e-05, "loss": 1.4029, "step": 1152 }, { "epoch": 3.77, "gpu_memory": 2903643648, "learning_rate": 2.8463375796178344e-05, "loss": 1.4541, "step": 1280 }, { "epoch": 4.0, "eval_bp": 0.06814983706797134, "eval_counts": [ 481, 162, 79, 37 ], "eval_loss": 1.6631227731704712, "eval_precisions": [ 37.286821705426355, 16.363636363636363, 10.881542699724518, 6.630824372759856 ], "eval_ref_len": 4755, "eval_runtime": 45.6186, "eval_samples_per_second": 6.576, "eval_score": 0.9871612910485801, "eval_steps_per_second": 6.576, "eval_sys_len": 1290, "eval_totals": [ 1290, 990, 726, 558 ], "gpu_memory": 2903643648, "step": 1356 }, { "epoch": 4.15, "gpu_memory": 2903643648, "learning_rate": 2.7800955414012737e-05, "loss": 1.4088, "step": 1408 }, { "epoch": 4.53, "gpu_memory": 2903643648, "learning_rate": 2.713853503184713e-05, "loss": 1.3351, "step": 1536 }, { "epoch": 4.91, "gpu_memory": 2903643648, "learning_rate": 2.647611464968153e-05, "loss": 1.3229, "step": 1664 }, { "epoch": 5.0, "eval_bp": 0.23640264658354365, "eval_counts": [ 633, 216, 105, 58 ], "eval_loss": 1.6731408834457397, "eval_precisions": [ 32.5115562403698, 13.114754098360656, 7.658643326039387, 5.239385727190605 ], "eval_ref_len": 4755, "eval_runtime": 51.3595, "eval_samples_per_second": 5.841, "eval_score": 2.703708498377427, "eval_steps_per_second": 5.841, "eval_sys_len": 1947, "eval_totals": [ 1947, 1647, 1371, 1107 ], "gpu_memory": 2903643648, "step": 1695 }, { "epoch": 5.29, "gpu_memory": 2903643648, "learning_rate": 2.5813694267515922e-05, "loss": 1.2429, "step": 1792 }, { "epoch": 5.66, "gpu_memory": 2903643648, "learning_rate": 2.515127388535032e-05, "loss": 1.2329, "step": 1920 }, { "epoch": 6.0, "eval_bp": 0.07532276614122083, "eval_counts": [ 579, 202, 98, 55 ], "eval_loss": 1.6539884805679321, "eval_precisions": [ 43.665158371040725, 19.68810916179337, 13.01460823373174, 9.499136442141623 ], "eval_ref_len": 4755, "eval_runtime": 46.8126, "eval_samples_per_second": 6.409, "eval_score": 1.3600028829560191, "eval_steps_per_second": 6.409, "eval_sys_len": 1326, "eval_totals": [ 1326, 1026, 753, 579 ], "gpu_memory": 2903643648, "step": 2034 }, { "epoch": 6.04, "gpu_memory": 2903643648, "learning_rate": 2.4488853503184713e-05, "loss": 1.2504, "step": 2048 }, { "epoch": 6.42, "gpu_memory": 2903643648, "learning_rate": 2.3826433121019104e-05, "loss": 1.1421, "step": 2176 }, { "epoch": 6.8, "gpu_memory": 2903643648, "learning_rate": 2.31640127388535e-05, "loss": 1.1795, "step": 2304 }, { "epoch": 7.0, "eval_bp": 0.17181721996808308, "eval_counts": [ 768, 262, 133, 70 ], "eval_loss": 1.667359471321106, "eval_precisions": [ 44.599303135888505, 18.424753867791843, 11.697449428320141, 8.018327605956472 ], "eval_ref_len": 4755, "eval_runtime": 50.5053, "eval_samples_per_second": 5.94, "eval_score": 2.862812289607837, "eval_steps_per_second": 5.94, "eval_sys_len": 1722, "eval_totals": [ 1722, 1422, 1137, 873 ], "gpu_memory": 2903643648, "step": 2373 }, { "epoch": 7.17, "gpu_memory": 2903643648, "learning_rate": 2.2501592356687895e-05, "loss": 1.0902, "step": 2432 }, { "epoch": 7.55, "gpu_memory": 2903643648, "learning_rate": 2.183917197452229e-05, "loss": 1.0705, "step": 2560 }, { "epoch": 7.93, "gpu_memory": 2903643648, "learning_rate": 2.1176751592356686e-05, "loss": 1.1128, "step": 2688 }, { "epoch": 8.0, "eval_bp": 0.2669632643662467, "eval_counts": [ 866, 300, 163, 96 ], "eval_loss": 1.708727240562439, "eval_precisions": [ 42.26451927769644, 17.152658662092623, 11.20274914089347, 8.226221079691516 ], "eval_ref_len": 4755, "eval_runtime": 53.4181, "eval_samples_per_second": 5.616, "eval_score": 4.291998839505449, "eval_steps_per_second": 5.616, "eval_sys_len": 2049, "eval_totals": [ 2049, 1749, 1455, 1167 ], "gpu_memory": 2903643648, "step": 2712 }, { "epoch": 8.31, "gpu_memory": 2903643648, "learning_rate": 2.051433121019108e-05, "loss": 1.0162, "step": 2816 }, { "epoch": 8.68, "gpu_memory": 2903643648, "learning_rate": 1.9851910828025477e-05, "loss": 1.0183, "step": 2944 }, { "epoch": 9.0, "eval_bp": 0.09731210069014802, "eval_counts": [ 678, 233, 102, 45 ], "eval_loss": 1.7135441303253174, "eval_precisions": [ 47.47899159663866, 20.656028368794328, 12.23021582733813, 7.142857142857143 ], "eval_ref_len": 4755, "eval_runtime": 50.1778, "eval_samples_per_second": 5.979, "eval_score": 1.664870454299152, "eval_steps_per_second": 5.979, "eval_sys_len": 1428, "eval_totals": [ 1428, 1128, 834, 630 ], "gpu_memory": 2903643648, "step": 3051 }, { "epoch": 9.06, "gpu_memory": 2903643648, "learning_rate": 1.918949044585987e-05, "loss": 1.0367, "step": 3072 }, { "epoch": 9.44, "gpu_memory": 2903643648, "learning_rate": 1.8527070063694264e-05, "loss": 0.9645, "step": 3200 }, { "epoch": 9.82, "gpu_memory": 2903643648, "learning_rate": 1.786464968152866e-05, "loss": 0.9616, "step": 3328 }, { "epoch": 10.0, "eval_bp": 0.22930577411313655, "eval_counts": [ 768, 280, 145, 80 ], "eval_loss": 1.736754298210144, "eval_precisions": [ 39.93759750390016, 17.25200246457178, 10.837070254110612, 7.428040854224698 ], "eval_ref_len": 4755, "eval_runtime": 57.956, "eval_samples_per_second": 5.176, "eval_score": 3.518980787396955, "eval_steps_per_second": 5.176, "eval_sys_len": 1923, "eval_totals": [ 1923, 1623, 1338, 1077 ], "gpu_memory": 2903643648, "step": 3390 }, { "epoch": 10.19, "gpu_memory": 2903643648, "learning_rate": 1.7202229299363055e-05, "loss": 0.9403, "step": 3456 }, { "epoch": 10.57, "gpu_memory": 2903643648, "learning_rate": 1.6539808917197452e-05, "loss": 0.9059, "step": 3584 }, { "epoch": 10.95, "gpu_memory": 2903643648, "learning_rate": 1.5877388535031846e-05, "loss": 0.9249, "step": 3712 }, { "epoch": 11.0, "eval_bp": 0.1751321349922995, "eval_counts": [ 748, 240, 115, 63 ], "eval_loss": 1.782728672027588, "eval_precisions": [ 43.13725490196079, 16.736401673640167, 10.008703220191471, 7.11864406779661 ], "eval_ref_len": 4755, "eval_runtime": 54.5903, "eval_samples_per_second": 5.495, "eval_score": 2.6374744638290037, "eval_steps_per_second": 5.495, "eval_sys_len": 1734, "eval_totals": [ 1734, 1434, 1149, 885 ], "gpu_memory": 2903643648, "step": 3729 }, { "epoch": 11.33, "gpu_memory": 2903643648, "learning_rate": 1.5214968152866242e-05, "loss": 0.8587, "step": 3840 }, { "epoch": 11.7, "gpu_memory": 2903643648, "learning_rate": 1.4552547770700635e-05, "loss": 0.8739, "step": 3968 }, { "epoch": 12.0, "eval_bp": 0.1555153512571023, "eval_counts": [ 739, 267, 125, 60 ], "eval_loss": 1.8148356676101685, "eval_precisions": [ 44.46450060168472, 19.60352422907489, 11.671335200746965, 7.462686567164179 ], "eval_ref_len": 4755, "eval_runtime": 53.3032, "eval_samples_per_second": 5.628, "eval_score": 2.581452241674501, "eval_steps_per_second": 5.628, "eval_sys_len": 1662, "eval_totals": [ 1662, 1362, 1071, 804 ], "gpu_memory": 2903643648, "step": 4068 }, { "epoch": 12.08, "gpu_memory": 2903643648, "learning_rate": 1.3890127388535031e-05, "loss": 0.8413, "step": 4096 }, { "epoch": 12.46, "gpu_memory": 2903643648, "learning_rate": 1.3227707006369426e-05, "loss": 0.8195, "step": 4224 }, { "epoch": 12.84, "gpu_memory": 2903643648, "learning_rate": 1.2565286624203822e-05, "loss": 0.823, "step": 4352 }, { "epoch": 13.0, "eval_bp": 0.2187397058134024, "eval_counts": [ 843, 326, 173, 91 ], "eval_loss": 1.8146471977233887, "eval_precisions": [ 44.67408585055644, 20.5419029615627, 13.442113442113442, 9.027777777777779 ], "eval_ref_len": 4755, "eval_runtime": 55.2439, "eval_samples_per_second": 5.43, "eval_score": 3.995892671984357, "eval_steps_per_second": 5.43, "eval_sys_len": 1887, "eval_totals": [ 1887, 1587, 1287, 1008 ], "gpu_memory": 2903643648, "step": 4407 }, { "epoch": 13.22, "gpu_memory": 2903643648, "learning_rate": 1.1902866242038214e-05, "loss": 0.7992, "step": 4480 }, { "epoch": 13.59, "gpu_memory": 2903643648, "learning_rate": 1.124044585987261e-05, "loss": 0.7702, "step": 4608 }, { "epoch": 13.97, "gpu_memory": 2903643648, "learning_rate": 1.0578025477707005e-05, "loss": 0.7824, "step": 4736 }, { "epoch": 14.0, "eval_bp": 0.16524048903893263, "eval_counts": [ 719, 244, 108, 52 ], "eval_loss": 1.8748054504394531, "eval_precisions": [ 42.34393404004712, 17.453505007153076, 9.72972972972973, 6.081871345029239 ], "eval_ref_len": 4755, "eval_runtime": 54.7238, "eval_samples_per_second": 5.482, "eval_score": 2.389568242739576, "eval_steps_per_second": 5.482, "eval_sys_len": 1698, "eval_totals": [ 1698, 1398, 1110, 855 ], "gpu_memory": 2903643648, "step": 4746 }, { "epoch": 14.35, "gpu_memory": 2903643648, "learning_rate": 9.9156050955414e-06, "loss": 0.7425, "step": 4864 }, { "epoch": 14.72, "gpu_memory": 2903643648, "learning_rate": 9.253184713375794e-06, "loss": 0.7501, "step": 4992 }, { "epoch": 15.0, "eval_bp": 0.1953640836862138, "eval_counts": [ 762, 263, 131, 74 ], "eval_loss": 1.9026106595993042, "eval_precisions": [ 42.19269102990033, 17.46347941567065, 10.835401157981803, 7.781282860147213 ], "eval_ref_len": 4755, "eval_runtime": 56.8759, "eval_samples_per_second": 5.275, "eval_score": 3.0843295492719487, "eval_steps_per_second": 5.275, "eval_sys_len": 1806, "eval_totals": [ 1806, 1506, 1209, 951 ], "gpu_memory": 2903643648, "step": 5085 }, { "epoch": 15.1, "gpu_memory": 2903643648, "learning_rate": 8.59076433121019e-06, "loss": 0.7315, "step": 5120 }, { "epoch": 15.48, "gpu_memory": 2903643648, "learning_rate": 7.928343949044585e-06, "loss": 0.7011, "step": 5248 }, { "epoch": 15.86, "gpu_memory": 2903643648, "learning_rate": 7.265923566878981e-06, "loss": 0.7139, "step": 5376 }, { "epoch": 16.0, "eval_bp": 0.23551335586741148, "eval_counts": [ 816, 277, 129, 72 ], "eval_loss": 1.9286922216415405, "eval_precisions": [ 41.97530864197531, 16.849148418491485, 9.57683741648107, 6.70391061452514 ], "eval_ref_len": 4755, "eval_runtime": 58.3566, "eval_samples_per_second": 5.141, "eval_score": 3.4379225352028846, "eval_steps_per_second": 5.141, "eval_sys_len": 1944, "eval_totals": [ 1944, 1644, 1347, 1074 ], "gpu_memory": 2903643648, "step": 5424 }, { "epoch": 16.24, "gpu_memory": 2903643648, "learning_rate": 6.6035031847133755e-06, "loss": 0.689, "step": 5504 }, { "epoch": 16.61, "gpu_memory": 2903643648, "learning_rate": 5.94108280254777e-06, "loss": 0.6788, "step": 5632 }, { "epoch": 16.99, "gpu_memory": 2903643648, "learning_rate": 5.278662420382165e-06, "loss": 0.7053, "step": 5760 }, { "epoch": 17.0, "eval_bp": 0.2934278208519596, "eval_counts": [ 886, 340, 171, 99 ], "eval_loss": 1.9354726076126099, "eval_precisions": [ 41.47940074906367, 18.51851851851852, 11.089494163424124, 7.746478873239437 ], "eval_ref_len": 4755, "eval_runtime": 60.6492, "eval_samples_per_second": 4.946, "eval_score": 4.702891790634525, "eval_steps_per_second": 4.946, "eval_sys_len": 2136, "eval_totals": [ 2136, 1836, 1542, 1278 ], "gpu_memory": 2903643648, "step": 5763 } ], "max_steps": 6780, "num_train_epochs": 20, "total_flos": 1765580040806400.0, "trial_name": null, "trial_params": null }