{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.91103202846975, "eval_steps": 500, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.89, "learning_rate": 1.9288256227758007e-05, "loss": 0.7021, "step": 500 }, { "epoch": 1.0, "eval_bp": 0.4478803932550057, "eval_counts": [ 951, 851, 761, 672 ], "eval_loss": 0.1726560890674591, "eval_precisions": [ 95.96367305751765, 92.19934994582881, 89.00584795321637, 85.38754764930114 ], "eval_ref_len": 1787, "eval_runtime": 18.1569, "eval_samples_per_second": 3.745, "eval_score": 40.55776382580726, "eval_steps_per_second": 1.873, "eval_sys_len": 991, "eval_totals": [ 991, 923, 855, 787 ], "step": 562 }, { "epoch": 1.78, "learning_rate": 1.8576512455516017e-05, "loss": 0.2854, "step": 1000 }, { "epoch": 2.0, "eval_bp": 0.441355602158059, "eval_counts": [ 962, 883, 802, 721 ], "eval_loss": 0.11405563354492188, "eval_precisions": [ 97.86368260427264, 96.50273224043715, 94.68713105076742, 92.55455712451861 ], "eval_ref_len": 1787, "eval_runtime": 16.4762, "eval_samples_per_second": 4.127, "eval_score": 42.09699789518687, "eval_steps_per_second": 2.064, "eval_sys_len": 983, "eval_totals": [ 983, 915, 847, 779 ], "step": 1124 }, { "epoch": 2.67, "learning_rate": 1.7864768683274022e-05, "loss": 0.1956, "step": 1500 }, { "epoch": 3.0, "eval_bp": 0.44380356964488166, "eval_counts": [ 965, 886, 806, 726 ], "eval_loss": 0.10203403234481812, "eval_precisions": [ 97.87018255578093, 96.51416122004358, 94.82352941176471, 92.8388746803069 ], "eval_ref_len": 1787, "eval_runtime": 16.5425, "eval_samples_per_second": 4.111, "eval_score": 42.380165345955355, "eval_steps_per_second": 2.055, "eval_sys_len": 986, "eval_totals": [ 986, 918, 850, 782 ], "step": 1686 }, { "epoch": 3.56, "learning_rate": 1.715302491103203e-05, "loss": 0.1441, "step": 2000 }, { "epoch": 4.0, "eval_bp": 0.437272650606809, "eval_counts": [ 966, 894, 816, 739 ], "eval_loss": 0.05576588958501816, "eval_precisions": [ 98.77300613496932, 98.24175824175825, 96.91211401425178, 95.4780361757106 ], "eval_ref_len": 1787, "eval_runtime": 16.5385, "eval_samples_per_second": 4.112, "eval_score": 42.565359021286106, "eval_steps_per_second": 2.056, "eval_sys_len": 978, "eval_totals": [ 978, 910, 842, 774 ], "step": 2248 }, { "epoch": 4.45, "learning_rate": 1.6441281138790037e-05, "loss": 0.1082, "step": 2500 }, { "epoch": 5.0, "eval_bp": 0.425821753267709, "eval_counts": [ 956, 882, 809, 736 ], "eval_loss": 0.0341346338391304, "eval_precisions": [ 99.1701244813278, 98.4375, 97.70531400966183, 96.84210526315789 ], "eval_ref_len": 1787, "eval_runtime": 16.4383, "eval_samples_per_second": 4.137, "eval_score": 41.745417030738814, "eval_steps_per_second": 2.068, "eval_sys_len": 964, "eval_totals": [ 964, 896, 828, 760 ], "step": 2810 }, { "epoch": 5.34, "learning_rate": 1.5729537366548043e-05, "loss": 0.0942, "step": 3000 }, { "epoch": 6.0, "eval_bp": 0.4356384499188918, "eval_counts": [ 973, 899, 824, 749 ], "eval_loss": 0.05856137350201607, "eval_precisions": [ 99.69262295081967, 99.00881057268722, 98.0952380952381, 97.02072538860104 ], "eval_ref_len": 1787, "eval_runtime": 16.3313, "eval_samples_per_second": 4.164, "eval_score": 42.888270294493864, "eval_steps_per_second": 2.082, "eval_sys_len": 976, "eval_totals": [ 976, 908, 840, 772 ], "step": 3372 }, { "epoch": 6.23, "learning_rate": 1.5017793594306052e-05, "loss": 0.0713, "step": 3500 }, { "epoch": 7.0, "eval_bp": 0.42745918009113226, "eval_counts": [ 964, 894, 824, 754 ], "eval_loss": 0.02425399236381054, "eval_precisions": [ 99.79296066252589, 99.55456570155901, 99.27710843373494, 98.9501312335958 ], "eval_ref_len": 1787, "eval_runtime": 16.4255, "eval_samples_per_second": 4.14, "eval_score": 42.486533114975146, "eval_steps_per_second": 2.07, "eval_sys_len": 966, "eval_totals": [ 966, 898, 830, 762 ], "step": 3934 }, { "epoch": 7.12, "learning_rate": 1.4306049822064058e-05, "loss": 0.0599, "step": 4000 }, { "epoch": 8.0, "eval_bp": 0.43073249568232586, "eval_counts": [ 961, 890, 820, 751 ], "eval_loss": 0.015419703908264637, "eval_precisions": [ 99.0721649484536, 98.66962305986696, 98.32134292565948, 98.04177545691905 ], "eval_ref_len": 1787, "eval_runtime": 16.187, "eval_samples_per_second": 4.201, "eval_score": 42.438122327030186, "eval_steps_per_second": 2.1, "eval_sys_len": 970, "eval_totals": [ 970, 902, 834, 766 ], "step": 4496 }, { "epoch": 8.01, "learning_rate": 1.3594306049822066e-05, "loss": 0.048, "step": 4500 }, { "epoch": 8.9, "learning_rate": 1.2882562277580073e-05, "loss": 0.0392, "step": 5000 }, { "epoch": 9.0, "eval_bp": 0.4250028527550296, "eval_counts": [ 959, 889, 818, 747 ], "eval_loss": 0.019724518060684204, "eval_precisions": [ 99.5846313603323, 99.32960893854748, 98.9117291414752, 98.41897233201581 ], "eval_ref_len": 1787, "eval_runtime": 16.8132, "eval_samples_per_second": 4.044, "eval_score": 42.10088852964188, "eval_steps_per_second": 2.022, "eval_sys_len": 963, "eval_totals": [ 963, 895, 827, 759 ], "step": 5058 }, { "epoch": 9.79, "learning_rate": 1.217081850533808e-05, "loss": 0.0319, "step": 5500 }, { "epoch": 10.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.005858541466295719, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.1653, "eval_samples_per_second": 4.207, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.103, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 5620 }, { "epoch": 10.68, "learning_rate": 1.1459074733096086e-05, "loss": 0.026, "step": 6000 }, { "epoch": 11.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 829, 760 ], "eval_loss": 0.010834704153239727, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.63942307692308, 99.47643979057591 ], "eval_ref_len": 1787, "eval_runtime": 16.4611, "eval_samples_per_second": 4.131, "eval_score": 42.768727019910926, "eval_steps_per_second": 2.065, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 6182 }, { "epoch": 11.57, "learning_rate": 1.0747330960854094e-05, "loss": 0.025, "step": 6500 }, { "epoch": 12.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.0013562627136707306, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.1168, "eval_samples_per_second": 4.219, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.11, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 6744 }, { "epoch": 12.46, "learning_rate": 1.0035587188612101e-05, "loss": 0.0182, "step": 7000 }, { "epoch": 13.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.0013860436156392097, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.6416, "eval_samples_per_second": 4.086, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.043, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 7306 }, { "epoch": 13.35, "learning_rate": 9.323843416370107e-06, "loss": 0.0148, "step": 7500 }, { "epoch": 14.0, "eval_bp": 0.4315504956415897, "eval_counts": [ 963, 894, 825, 756 ], "eval_loss": 0.004028095863759518, "eval_precisions": [ 99.17610710607622, 99.00332225913621, 98.80239520958084, 98.56584093872229 ], "eval_ref_len": 1787, "eval_runtime": 16.2486, "eval_samples_per_second": 4.185, "eval_score": 42.674584599098594, "eval_steps_per_second": 2.092, "eval_sys_len": 971, "eval_totals": [ 971, 903, 835, 767 ], "step": 7868 }, { "epoch": 14.23, "learning_rate": 8.612099644128115e-06, "loss": 0.0149, "step": 8000 }, { "epoch": 15.0, "eval_bp": 0.4299143626976017, "eval_counts": [ 967, 899, 830, 761 ], "eval_loss": 0.004267631098628044, "eval_precisions": [ 99.79360165118679, 99.7780244173141, 99.63985594237695, 99.47712418300654 ], "eval_ref_len": 1787, "eval_runtime": 16.2443, "eval_samples_per_second": 4.186, "eval_score": 42.85045442963625, "eval_steps_per_second": 2.093, "eval_sys_len": 969, "eval_totals": [ 969, 901, 833, 765 ], "step": 8430 }, { "epoch": 15.12, "learning_rate": 7.900355871886122e-06, "loss": 0.0144, "step": 8500 }, { "epoch": 16.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.00286501320078969, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.5144, "eval_samples_per_second": 4.118, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.059, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 8992 }, { "epoch": 16.01, "learning_rate": 7.188612099644129e-06, "loss": 0.0112, "step": 9000 }, { "epoch": 16.9, "learning_rate": 6.476868327402136e-06, "loss": 0.0098, "step": 9500 }, { "epoch": 17.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.0009978804737329483, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.1028, "eval_samples_per_second": 4.223, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.111, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 9554 }, { "epoch": 17.79, "learning_rate": 5.765124555160143e-06, "loss": 0.0089, "step": 10000 }, { "epoch": 18.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.00037646759301424026, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.4684, "eval_samples_per_second": 4.129, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.065, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 10116 }, { "epoch": 18.68, "learning_rate": 5.05338078291815e-06, "loss": 0.0075, "step": 10500 }, { "epoch": 19.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 0.00018596854351926595, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.3451, "eval_samples_per_second": 4.16, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.08, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 10678 }, { "epoch": 19.57, "learning_rate": 4.341637010676157e-06, "loss": 0.0068, "step": 11000 }, { "epoch": 20.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 7.006935447861906e-06, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.0905, "eval_samples_per_second": 4.226, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.113, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 11240 }, { "epoch": 20.46, "learning_rate": 3.629893238434164e-06, "loss": 0.0056, "step": 11500 }, { "epoch": 21.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 1.5906211046967655e-05, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.3937, "eval_samples_per_second": 4.148, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.074, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 11802 }, { "epoch": 21.35, "learning_rate": 2.918149466192171e-06, "loss": 0.0041, "step": 12000 }, { "epoch": 22.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 1.3131144442013465e-05, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.4474, "eval_samples_per_second": 4.134, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.067, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 12364 }, { "epoch": 22.24, "learning_rate": 2.2064056939501782e-06, "loss": 0.0033, "step": 12500 }, { "epoch": 23.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 5.197064183448674e-06, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.1321, "eval_samples_per_second": 4.215, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.108, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 12926 }, { "epoch": 23.13, "learning_rate": 1.494661921708185e-06, "loss": 0.0033, "step": 13000 }, { "epoch": 24.0, "eval_bp": 0.42909609813393135, "eval_counts": [ 966, 898, 830, 762 ], "eval_loss": 2.9162031296436908e-06, "eval_precisions": [ 99.79338842975207, 99.77777777777777, 99.75961538461539, 99.73821989528795 ], "eval_ref_len": 1787, "eval_runtime": 16.4335, "eval_samples_per_second": 4.138, "eval_score": 42.809736944779694, "eval_steps_per_second": 2.069, "eval_sys_len": 968, "eval_totals": [ 968, 900, 832, 764 ], "step": 13488 }, { "epoch": 24.02, "learning_rate": 7.829181494661923e-07, "loss": 0.0025, "step": 13500 }, { "epoch": 24.91, "learning_rate": 7.117437722419929e-08, "loss": 0.0023, "step": 14000 } ], "logging_steps": 500, "max_steps": 14050, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 2476170265006080.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }