{ "best_metric": null, "best_model_checkpoint": null, "epoch": 68.0, "eval_steps": 500, "global_step": 3094, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.79, "learning_rate": 1.5555555555555555e-05, "loss": 2.3054, "step": 36 }, { "epoch": 1.58, "learning_rate": 3.155555555555556e-05, "loss": 2.1949, "step": 72 }, { "epoch": 2.37, "learning_rate": 4.755555555555556e-05, "loss": 1.978, "step": 108 }, { "epoch": 3.16, "learning_rate": 6.355555555555556e-05, "loss": 1.9161, "step": 144 }, { "epoch": 3.96, "learning_rate": 7.955555555555556e-05, "loss": 1.8945, "step": 180 }, { "epoch": 4.75, "learning_rate": 9.555555555555557e-05, "loss": 1.8682, "step": 216 }, { "epoch": 5.54, "learning_rate": 0.00011155555555555556, "loss": 1.8492, "step": 252 }, { "epoch": 6.33, "learning_rate": 0.00012755555555555556, "loss": 1.8337, "step": 288 }, { "epoch": 7.12, "learning_rate": 0.0001431111111111111, "loss": 1.8144, "step": 324 }, { "epoch": 7.91, "learning_rate": 0.00015911111111111112, "loss": 1.7907, "step": 360 }, { "epoch": 8.7, "learning_rate": 0.00017511111111111113, "loss": 1.7382, "step": 396 }, { "epoch": 9.49, "learning_rate": 0.00019111111111111114, "loss": 1.6991, "step": 432 }, { "epoch": 10.29, "learning_rate": 0.00019925925925925927, "loss": 1.6405, "step": 468 }, { "epoch": 11.08, "learning_rate": 0.00019748148148148148, "loss": 1.5929, "step": 504 }, { "epoch": 11.87, "learning_rate": 0.0001957037037037037, "loss": 1.5414, "step": 540 }, { "epoch": 12.66, "learning_rate": 0.00019392592592592592, "loss": 1.447, "step": 576 }, { "epoch": 13.45, "learning_rate": 0.00019214814814814816, "loss": 1.3947, "step": 612 }, { "epoch": 14.24, "learning_rate": 0.00019037037037037037, "loss": 1.3655, "step": 648 }, { "epoch": 15.03, "learning_rate": 0.0001885925925925926, "loss": 1.2873, "step": 684 }, { "epoch": 15.82, "learning_rate": 0.0001868148148148148, "loss": 1.2198, "step": 720 }, { "epoch": 16.62, "learning_rate": 0.00018503703703703705, "loss": 1.1512, "step": 756 }, { "epoch": 17.41, "learning_rate": 0.00018325925925925926, "loss": 1.1286, "step": 792 }, { "epoch": 18.2, "learning_rate": 0.0001814814814814815, "loss": 1.0709, "step": 828 }, { "epoch": 18.99, "learning_rate": 0.0001797037037037037, "loss": 1.0442, "step": 864 }, { "epoch": 19.78, "learning_rate": 0.00017792592592592594, "loss": 0.971, "step": 900 }, { "epoch": 20.57, "learning_rate": 0.00017614814814814815, "loss": 0.9412, "step": 936 }, { "epoch": 21.36, "learning_rate": 0.00017437037037037039, "loss": 0.9084, "step": 972 }, { "epoch": 22.15, "learning_rate": 0.0001725925925925926, "loss": 0.885, "step": 1008 }, { "epoch": 22.95, "learning_rate": 0.00017081481481481483, "loss": 0.844, "step": 1044 }, { "epoch": 23.74, "learning_rate": 0.00016903703703703704, "loss": 0.7975, "step": 1080 }, { "epoch": 24.53, "learning_rate": 0.00016725925925925928, "loss": 0.7786, "step": 1116 }, { "epoch": 25.32, "learning_rate": 0.00016548148148148149, "loss": 0.7465, "step": 1152 }, { "epoch": 26.11, "learning_rate": 0.00016370370370370372, "loss": 0.7311, "step": 1188 }, { "epoch": 26.9, "learning_rate": 0.00016192592592592593, "loss": 0.6955, "step": 1224 }, { "epoch": 27.69, "learning_rate": 0.00016014814814814817, "loss": 0.6656, "step": 1260 }, { "epoch": 28.48, "learning_rate": 0.00015837037037037038, "loss": 0.6507, "step": 1296 }, { "epoch": 29.27, "learning_rate": 0.0001565925925925926, "loss": 0.6336, "step": 1332 }, { "epoch": 30.07, "learning_rate": 0.00015481481481481482, "loss": 0.6142, "step": 1368 }, { "epoch": 30.86, "learning_rate": 0.00015303703703703706, "loss": 0.5865, "step": 1404 }, { "epoch": 31.65, "learning_rate": 0.00015125925925925927, "loss": 0.5649, "step": 1440 }, { "epoch": 32.44, "learning_rate": 0.00014948148148148148, "loss": 0.5511, "step": 1476 }, { "epoch": 33.23, "learning_rate": 0.0001477037037037037, "loss": 0.5329, "step": 1512 }, { "epoch": 34.02, "learning_rate": 0.00014592592592592592, "loss": 0.5265, "step": 1548 }, { "epoch": 34.81, "learning_rate": 0.00014414814814814816, "loss": 0.4931, "step": 1584 }, { "epoch": 35.6, "learning_rate": 0.00014237037037037037, "loss": 0.4904, "step": 1620 }, { "epoch": 36.4, "learning_rate": 0.0001405925925925926, "loss": 0.466, "step": 1656 }, { "epoch": 37.19, "learning_rate": 0.0001388148148148148, "loss": 0.4585, "step": 1692 }, { "epoch": 37.98, "learning_rate": 0.00013703703703703705, "loss": 0.4545, "step": 1728 }, { "epoch": 38.77, "learning_rate": 0.00013525925925925926, "loss": 0.4244, "step": 1764 }, { "epoch": 39.56, "learning_rate": 0.0001334814814814815, "loss": 0.4143, "step": 1800 }, { "epoch": 40.35, "learning_rate": 0.0001317037037037037, "loss": 0.4118, "step": 1836 }, { "epoch": 41.14, "learning_rate": 0.00012992592592592594, "loss": 0.3963, "step": 1872 }, { "epoch": 41.93, "learning_rate": 0.00012814814814814815, "loss": 0.3901, "step": 1908 }, { "epoch": 42.73, "learning_rate": 0.00012637037037037038, "loss": 0.3697, "step": 1944 }, { "epoch": 43.52, "learning_rate": 0.0001245925925925926, "loss": 0.3595, "step": 1980 }, { "epoch": 44.31, "learning_rate": 0.00012281481481481483, "loss": 0.3609, "step": 2016 }, { "epoch": 45.1, "learning_rate": 0.00012103703703703704, "loss": 0.3457, "step": 2052 }, { "epoch": 45.89, "learning_rate": 0.00011925925925925927, "loss": 0.3342, "step": 2088 }, { "epoch": 46.68, "learning_rate": 0.00011748148148148148, "loss": 0.3213, "step": 2124 }, { "epoch": 47.47, "learning_rate": 0.00011570370370370372, "loss": 0.3183, "step": 2160 }, { "epoch": 48.26, "learning_rate": 0.00011392592592592593, "loss": 0.3067, "step": 2196 }, { "epoch": 49.05, "learning_rate": 0.00011214814814814815, "loss": 0.3062, "step": 2232 }, { "epoch": 49.85, "learning_rate": 0.00011037037037037037, "loss": 0.291, "step": 2268 }, { "epoch": 50.64, "learning_rate": 0.0001085925925925926, "loss": 0.2837, "step": 2304 }, { "epoch": 51.43, "learning_rate": 0.0001068148148148148, "loss": 0.2768, "step": 2340 }, { "epoch": 52.22, "learning_rate": 0.00010503703703703704, "loss": 0.2733, "step": 2376 }, { "epoch": 53.01, "learning_rate": 0.00010325925925925925, "loss": 0.2622, "step": 2412 }, { "epoch": 53.8, "learning_rate": 0.00010148148148148149, "loss": 0.254, "step": 2448 }, { "epoch": 54.59, "learning_rate": 9.970370370370371e-05, "loss": 0.247, "step": 2484 }, { "epoch": 55.38, "learning_rate": 9.792592592592593e-05, "loss": 0.2415, "step": 2520 }, { "epoch": 56.18, "learning_rate": 9.614814814814816e-05, "loss": 0.2391, "step": 2556 }, { "epoch": 56.97, "learning_rate": 9.437037037037038e-05, "loss": 0.2318, "step": 2592 }, { "epoch": 57.76, "learning_rate": 9.25925925925926e-05, "loss": 0.221, "step": 2628 }, { "epoch": 58.55, "learning_rate": 9.081481481481482e-05, "loss": 0.2179, "step": 2664 }, { "epoch": 59.34, "learning_rate": 8.903703703703705e-05, "loss": 0.2145, "step": 2700 }, { "epoch": 60.13, "learning_rate": 8.725925925925927e-05, "loss": 0.209, "step": 2736 }, { "epoch": 60.92, "learning_rate": 8.548148148148148e-05, "loss": 0.205, "step": 2772 }, { "epoch": 61.71, "learning_rate": 8.37037037037037e-05, "loss": 0.197, "step": 2808 }, { "epoch": 62.51, "learning_rate": 8.192592592592592e-05, "loss": 0.1948, "step": 2844 }, { "epoch": 63.3, "learning_rate": 8.014814814814815e-05, "loss": 0.188, "step": 2880 }, { "epoch": 64.09, "learning_rate": 7.837037037037037e-05, "loss": 0.188, "step": 2916 }, { "epoch": 64.88, "learning_rate": 7.659259259259259e-05, "loss": 0.1785, "step": 2952 }, { "epoch": 65.67, "learning_rate": 7.481481481481481e-05, "loss": 0.1758, "step": 2988 }, { "epoch": 66.46, "learning_rate": 7.303703703703704e-05, "loss": 0.1703, "step": 3024 }, { "epoch": 67.25, "learning_rate": 7.125925925925926e-05, "loss": 0.1702, "step": 3060 } ], "logging_steps": 36, "max_steps": 4500, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.6941037663276237e+18, "trial_name": null, "trial_params": null }