{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.99889502762431, "global_step": 5763, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "gpu_memory": 2987030016, "learning_rate": 8.32e-06, "loss": 4.0407, "step": 128 }, { "epoch": 0.75, "gpu_memory": 3076460544, "learning_rate": 1.664e-05, "loss": 2.405, "step": 256 }, { "epoch": 1.0, "eval_bp": 0.035349686560536234, "eval_counts": [ 505, 125, 50, 11 ], "eval_loss": 1.9292821884155273, "eval_precisions": [ 46.118721461187214, 15.723270440251572, 9.861932938856016, 4.471544715447155 ], "eval_ref_len": 4755, "eval_runtime": 44.1807, "eval_samples_per_second": 6.79, "eval_score": 0.47271078280719403, "eval_steps_per_second": 6.79, "eval_sys_len": 1095, "eval_totals": [ 1095, 795, 507, 246 ], "gpu_memory": 3076460544, "step": 339 }, { "epoch": 1.13, "gpu_memory": 3076460544, "learning_rate": 2.4959999999999998e-05, "loss": 2.0089, "step": 384 }, { "epoch": 1.51, "gpu_memory": 3076460544, "learning_rate": 3.2437898089171974e-05, "loss": 1.8155, "step": 512 }, { "epoch": 1.89, "gpu_memory": 3076460544, "learning_rate": 3.1775477707006364e-05, "loss": 1.7234, "step": 640 }, { "epoch": 2.0, "eval_bp": 0.0840891954437523, "eval_counts": [ 492, 189, 85, 29 ], "eval_loss": 1.6681220531463623, "eval_precisions": [ 35.96491228070175, 17.696629213483146, 10.303030303030303, 4.833333333333333 ], "eval_ref_len": 4755, "eval_runtime": 53.1682, "eval_samples_per_second": 5.642, "eval_score": 1.1219810390322362, "eval_steps_per_second": 5.642, "eval_sys_len": 1368, "eval_totals": [ 1368, 1068, 825, 600 ], "gpu_memory": 3076460544, "step": 678 }, { "epoch": 2.27, "gpu_memory": 3076460544, "learning_rate": 3.111305732484076e-05, "loss": 1.6058, "step": 768 }, { "epoch": 2.64, "gpu_memory": 3076460544, "learning_rate": 3.0450636942675155e-05, "loss": 1.5189, "step": 896 }, { "epoch": 3.0, "eval_bp": 0.09192776836698148, "eval_counts": [ 571, 192, 93, 40 ], "eval_loss": 1.5985139608383179, "eval_precisions": [ 40.66951566951567, 17.391304347826086, 11.03202846975089, 6.734006734006734 ], "eval_ref_len": 4755, "eval_runtime": 44.5545, "eval_samples_per_second": 6.733, "eval_score": 1.391807704814939, "eval_steps_per_second": 6.733, "eval_sys_len": 1404, "eval_totals": [ 1404, 1104, 843, 594 ], "gpu_memory": 3076460544, "step": 1017 }, { "epoch": 3.02, "gpu_memory": 3076460544, "learning_rate": 2.9788216560509553e-05, "loss": 1.4885, "step": 1024 }, { "epoch": 3.4, "gpu_memory": 3076460544, "learning_rate": 2.9125796178343946e-05, "loss": 1.334, "step": 1152 }, { "epoch": 3.77, "gpu_memory": 3076460544, "learning_rate": 2.8463375796178344e-05, "loss": 1.3861, "step": 1280 }, { "epoch": 4.0, "eval_bp": 0.034513967404432855, "eval_counts": [ 432, 173, 84, 35 ], "eval_loss": 1.6043497323989868, "eval_precisions": [ 39.66942148760331, 21.926489226869457, 16.184971098265898, 9.48509485094851 ], "eval_ref_len": 4755, "eval_runtime": 42.8527, "eval_samples_per_second": 7.001, "eval_score": 0.6597653875525311, "eval_steps_per_second": 7.001, "eval_sys_len": 1089, "eval_totals": [ 1089, 789, 519, 369 ], "gpu_memory": 3076460544, "step": 1356 }, { "epoch": 4.15, "gpu_memory": 3076460544, "learning_rate": 2.7800955414012737e-05, "loss": 1.3367, "step": 1408 }, { "epoch": 4.53, "gpu_memory": 3076460544, "learning_rate": 2.713853503184713e-05, "loss": 1.2828, "step": 1536 }, { "epoch": 4.91, "gpu_memory": 3076460544, "learning_rate": 2.647611464968153e-05, "loss": 1.2571, "step": 1664 }, { "epoch": 5.0, "eval_bp": 0.17929973112718744, "eval_counts": [ 671, 230, 102, 43 ], "eval_loss": 1.5908681154251099, "eval_precisions": [ 38.36477987421384, 15.873015873015873, 8.695652173913043, 4.699453551912568 ], "eval_ref_len": 4755, "eval_runtime": 55.6404, "eval_samples_per_second": 5.392, "eval_score": 2.2519827467510987, "eval_steps_per_second": 5.392, "eval_sys_len": 1749, "eval_totals": [ 1749, 1449, 1173, 915 ], "gpu_memory": 3076460544, "step": 1695 }, { "epoch": 5.29, "gpu_memory": 3076460544, "learning_rate": 2.5813694267515922e-05, "loss": 1.2035, "step": 1792 }, { "epoch": 5.66, "gpu_memory": 3076460544, "learning_rate": 2.515127388535032e-05, "loss": 1.183, "step": 1920 }, { "epoch": 6.0, "eval_bp": 0.07050485313640832, "eval_counts": [ 615, 257, 141, 80 ], "eval_loss": 1.5943706035614014, "eval_precisions": [ 47.235023041474655, 25.64870259481038, 19.502074688796682, 14.109347442680775 ], "eval_ref_len": 4755, "eval_runtime": 47.1825, "eval_samples_per_second": 6.358, "eval_score": 1.6941362350992444, "eval_steps_per_second": 6.358, "eval_sys_len": 1302, "eval_totals": [ 1302, 1002, 723, 567 ], "gpu_memory": 3076460544, "step": 2034 }, { "epoch": 6.04, "gpu_memory": 3076460544, "learning_rate": 2.4488853503184713e-05, "loss": 1.1964, "step": 2048 }, { "epoch": 6.42, "gpu_memory": 3076460544, "learning_rate": 2.3826433121019104e-05, "loss": 1.1073, "step": 2176 }, { "epoch": 6.8, "gpu_memory": 3076460544, "learning_rate": 2.31640127388535e-05, "loss": 1.1316, "step": 2304 }, { "epoch": 7.0, "eval_bp": 0.10421315891869368, "eval_counts": [ 649, 197, 79, 22 ], "eval_loss": 1.6070951223373413, "eval_precisions": [ 44.51303155006859, 17.012089810017272, 9.111880046136102, 3.559870550161812 ], "eval_ref_len": 4755, "eval_runtime": 47.6479, "eval_samples_per_second": 6.296, "eval_score": 1.3046509061748794, "eval_steps_per_second": 6.296, "eval_sys_len": 1458, "eval_totals": [ 1458, 1158, 867, 618 ], "gpu_memory": 3076460544, "step": 2373 }, { "epoch": 7.17, "gpu_memory": 3076460544, "learning_rate": 2.2501592356687895e-05, "loss": 1.0398, "step": 2432 }, { "epoch": 7.55, "gpu_memory": 3076460544, "learning_rate": 2.183917197452229e-05, "loss": 1.0349, "step": 2560 }, { "epoch": 7.93, "gpu_memory": 3076460544, "learning_rate": 2.1176751592356686e-05, "loss": 1.0816, "step": 2688 }, { "epoch": 8.0, "eval_bp": 0.21001389512353258, "eval_counts": [ 846, 344, 187, 105 ], "eval_loss": 1.6298103332519531, "eval_precisions": [ 45.55735056542811, 22.093770070648684, 14.597970335675253, 10.294117647058824 ], "eval_ref_len": 4755, "eval_runtime": 54.6716, "eval_samples_per_second": 5.487, "eval_score": 4.141670104799348, "eval_steps_per_second": 5.487, "eval_sys_len": 1857, "eval_totals": [ 1857, 1557, 1281, 1020 ], "gpu_memory": 3076460544, "step": 2712 }, { "epoch": 8.31, "gpu_memory": 3076460544, "learning_rate": 2.051433121019108e-05, "loss": 0.987, "step": 2816 }, { "epoch": 8.68, "gpu_memory": 3076460544, "learning_rate": 1.9851910828025477e-05, "loss": 0.9829, "step": 2944 }, { "epoch": 9.0, "eval_bp": 0.06525766524199453, "eval_counts": [ 577, 216, 100, 37 ], "eval_loss": 1.6366333961486816, "eval_precisions": [ 45.254901960784316, 22.153846153846153, 14.367816091954023, 7.297830374753452 ], "eval_ref_len": 4755, "eval_runtime": 49.3567, "eval_samples_per_second": 6.078, "eval_score": 1.1750500193614282, "eval_steps_per_second": 6.078, "eval_sys_len": 1275, "eval_totals": [ 1275, 975, 696, 507 ], "gpu_memory": 3076460544, "step": 3051 }, { "epoch": 9.06, "gpu_memory": 3076460544, "learning_rate": 1.918949044585987e-05, "loss": 1.003, "step": 3072 }, { "epoch": 9.44, "gpu_memory": 3076460544, "learning_rate": 1.8527070063694264e-05, "loss": 0.9337, "step": 3200 }, { "epoch": 9.82, "gpu_memory": 3076460544, "learning_rate": 1.786464968152866e-05, "loss": 0.9325, "step": 3328 }, { "epoch": 10.0, "eval_bp": 0.16851984622310243, "eval_counts": [ 667, 248, 121, 62 ], "eval_loss": 1.67235267162323, "eval_precisions": [ 39.005847953216374, 17.588652482269502, 10.503472222222221, 6.68824163969795 ], "eval_ref_len": 4755, "eval_runtime": 50.0923, "eval_samples_per_second": 5.989, "eval_score": 2.4969097127652855, "eval_steps_per_second": 5.989, "eval_sys_len": 1710, "eval_totals": [ 1710, 1410, 1152, 927 ], "gpu_memory": 3076460544, "step": 3390 }, { "epoch": 10.19, "gpu_memory": 3076460544, "learning_rate": 1.7202229299363055e-05, "loss": 0.9075, "step": 3456 }, { "epoch": 10.57, "gpu_memory": 3076460544, "learning_rate": 1.6539808917197452e-05, "loss": 0.8753, "step": 3584 }, { "epoch": 10.95, "gpu_memory": 3076460544, "learning_rate": 1.5877388535031846e-05, "loss": 0.9098, "step": 3712 }, { "epoch": 11.0, "eval_bp": 0.1483387334695538, "eval_counts": [ 735, 268, 134, 67 ], "eval_loss": 1.6972090005874634, "eval_precisions": [ 44.95412844036697, 20.074906367041198, 12.725546058879392, 8.18070818070818 ], "eval_ref_len": 4755, "eval_runtime": 46.9235, "eval_samples_per_second": 6.393, "eval_score": 2.5970312545681904, "eval_steps_per_second": 6.393, "eval_sys_len": 1635, "eval_totals": [ 1635, 1335, 1053, 819 ], "gpu_memory": 3076460544, "step": 3729 }, { "epoch": 11.33, "gpu_memory": 3076460544, "learning_rate": 1.5214968152866242e-05, "loss": 0.839, "step": 3840 }, { "epoch": 11.7, "gpu_memory": 3076460544, "learning_rate": 1.4552547770700635e-05, "loss": 0.8643, "step": 3968 }, { "epoch": 12.0, "eval_bp": 0.1320190352563076, "eval_counts": [ 715, 285, 143, 70 ], "eval_loss": 1.713928461074829, "eval_precisions": [ 45.48346055979644, 22.40566037735849, 14.357429718875501, 9.25925925925926 ], "eval_ref_len": 4755, "eval_runtime": 46.2792, "eval_samples_per_second": 6.482, "eval_score": 2.532809945547002, "eval_steps_per_second": 6.482, "eval_sys_len": 1572, "eval_totals": [ 1572, 1272, 996, 756 ], "gpu_memory": 3076460544, "step": 4068 }, { "epoch": 12.08, "gpu_memory": 3076460544, "learning_rate": 1.3890127388535031e-05, "loss": 0.8264, "step": 4096 }, { "epoch": 12.46, "gpu_memory": 3076460544, "learning_rate": 1.3227707006369426e-05, "loss": 0.8008, "step": 4224 }, { "epoch": 12.84, "gpu_memory": 3076460544, "learning_rate": 1.2565286624203822e-05, "loss": 0.7963, "step": 4352 }, { "epoch": 13.0, "eval_bp": 0.18517745860640325, "eval_counts": [ 782, 310, 160, 79 ], "eval_loss": 1.7276182174682617, "eval_precisions": [ 44.18079096045198, 21.08843537414966, 13.43408900083963, 8.44017094017094 ], "eval_ref_len": 4755, "eval_runtime": 49.531, "eval_samples_per_second": 6.057, "eval_score": 3.3384697611529055, "eval_steps_per_second": 6.057, "eval_sys_len": 1770, "eval_totals": [ 1770, 1470, 1191, 936 ], "gpu_memory": 3076460544, "step": 4407 }, { "epoch": 13.22, "gpu_memory": 3076460544, "learning_rate": 1.1902866242038214e-05, "loss": 0.791, "step": 4480 }, { "epoch": 13.59, "gpu_memory": 3076460544, "learning_rate": 1.124044585987261e-05, "loss": 0.7591, "step": 4608 }, { "epoch": 13.97, "gpu_memory": 3076460544, "learning_rate": 1.0578025477707005e-05, "loss": 0.7651, "step": 4736 }, { "epoch": 14.0, "eval_bp": 0.17762954994257873, "eval_counts": [ 784, 310, 160, 81 ], "eval_loss": 1.788110375404358, "eval_precisions": [ 44.97991967871486, 21.48302148302148, 13.605442176870747, 8.653846153846153 ], "eval_ref_len": 4755, "eval_runtime": 48.2995, "eval_samples_per_second": 6.211, "eval_score": 3.262302153360586, "eval_steps_per_second": 6.211, "eval_sys_len": 1743, "eval_totals": [ 1743, 1443, 1176, 936 ], "gpu_memory": 3076460544, "step": 4746 }, { "epoch": 14.35, "gpu_memory": 3076460544, "learning_rate": 9.9156050955414e-06, "loss": 0.7389, "step": 4864 }, { "epoch": 14.72, "gpu_memory": 3076460544, "learning_rate": 9.253184713375794e-06, "loss": 0.7292, "step": 4992 }, { "epoch": 15.0, "eval_bp": 0.19451009506119815, "eval_counts": [ 756, 286, 139, 66 ], "eval_loss": 1.8334678411483765, "eval_precisions": [ 41.930116472545755, 19.028609447771125, 11.356209150326798, 6.790123456790123 ], "eval_ref_len": 4755, "eval_runtime": 50.9389, "eval_samples_per_second": 5.889, "eval_score": 3.063396343878355, "eval_steps_per_second": 5.889, "eval_sys_len": 1803, "eval_totals": [ 1803, 1503, 1224, 972 ], "gpu_memory": 3076460544, "step": 5085 }, { "epoch": 15.1, "gpu_memory": 3076460544, "learning_rate": 8.59076433121019e-06, "loss": 0.7051, "step": 5120 }, { "epoch": 15.48, "gpu_memory": 3076460544, "learning_rate": 7.928343949044585e-06, "loss": 0.6872, "step": 5248 }, { "epoch": 15.86, "gpu_memory": 3076460544, "learning_rate": 7.265923566878981e-06, "loss": 0.6935, "step": 5376 }, { "epoch": 16.0, "eval_bp": 0.2204937574447589, "eval_counts": [ 792, 311, 160, 80 ], "eval_loss": 1.8358988761901855, "eval_precisions": [ 41.83835182250396, 19.522912743251727, 12.121212121212121, 7.469654528478058 ], "eval_ref_len": 4755, "eval_runtime": 51.7295, "eval_samples_per_second": 5.799, "eval_score": 3.6361160482722528, "eval_steps_per_second": 5.799, "eval_sys_len": 1893, "eval_totals": [ 1893, 1593, 1320, 1071 ], "gpu_memory": 3076460544, "step": 5424 }, { "epoch": 16.24, "gpu_memory": 3076460544, "learning_rate": 6.6035031847133755e-06, "loss": 0.6808, "step": 5504 }, { "epoch": 16.61, "gpu_memory": 3076460544, "learning_rate": 5.94108280254777e-06, "loss": 0.6649, "step": 5632 }, { "epoch": 16.99, "gpu_memory": 3076460544, "learning_rate": 5.278662420382165e-06, "loss": 0.6902, "step": 5760 }, { "epoch": 17.0, "eval_bp": 0.27059488659440983, "eval_counts": [ 875, 346, 196, 113 ], "eval_loss": 1.8474984169006348, "eval_precisions": [ 42.45511887433285, 19.64792731402612, 13.198653198653199, 9.254709254709255 ], "eval_ref_len": 4755, "eval_runtime": 53.1106, "eval_samples_per_second": 5.649, "eval_score": 4.834531406134382, "eval_steps_per_second": 5.649, "eval_sys_len": 2061, "eval_totals": [ 2061, 1761, 1485, 1221 ], "gpu_memory": 3076460544, "step": 5763 } ], "max_steps": 6780, "num_train_epochs": 20, "total_flos": 5005888091043840.0, "trial_name": null, "trial_params": null }