{ "best_metric": null, "best_model_checkpoint": null, "epoch": 199.0662251655629, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.07, "learning_rate": 9.933774834437087e-07, "loss": 2.1111, "step": 30 }, { "epoch": 5.07, "learning_rate": 1.9867549668874175e-06, "loss": 2.0911, "step": 60 }, { "epoch": 8.07, "learning_rate": 2.9801324503311258e-06, "loss": 2.0488, "step": 90 }, { "epoch": 11.07, "learning_rate": 3.973509933774835e-06, "loss": 1.9825, "step": 120 }, { "epoch": 14.07, "learning_rate": 4.966887417218543e-06, "loss": 1.8997, "step": 150 }, { "epoch": 17.07, "learning_rate": 5.9602649006622515e-06, "loss": 1.811, "step": 180 }, { "epoch": 20.07, "learning_rate": 6.95364238410596e-06, "loss": 1.7265, "step": 210 }, { "epoch": 23.07, "learning_rate": 7.94701986754967e-06, "loss": 1.6527, "step": 240 }, { "epoch": 26.07, "learning_rate": 8.940397350993377e-06, "loss": 1.5955, "step": 270 }, { "epoch": 29.07, "learning_rate": 9.933774834437086e-06, "loss": 1.5492, "step": 300 }, { "epoch": 32.07, "learning_rate": 1.0927152317880796e-05, "loss": 1.508, "step": 330 }, { "epoch": 35.07, "learning_rate": 1.1920529801324503e-05, "loss": 1.4678, "step": 360 }, { "epoch": 38.07, "learning_rate": 1.2913907284768212e-05, "loss": 1.4278, "step": 390 }, { "epoch": 41.07, "learning_rate": 1.390728476821192e-05, "loss": 1.3864, "step": 420 }, { "epoch": 44.07, "learning_rate": 1.490066225165563e-05, "loss": 1.3418, "step": 450 }, { "epoch": 47.07, "learning_rate": 1.589403973509934e-05, "loss": 1.2955, "step": 480 }, { "epoch": 50.07, "learning_rate": 1.688741721854305e-05, "loss": 1.2506, "step": 510 }, { "epoch": 53.07, "learning_rate": 1.7880794701986755e-05, "loss": 1.2051, "step": 540 }, { "epoch": 56.07, "learning_rate": 1.8874172185430464e-05, "loss": 1.1617, "step": 570 }, { "epoch": 59.07, "learning_rate": 1.9867549668874173e-05, "loss": 1.1198, "step": 600 }, { "epoch": 62.07, "learning_rate": 2.0860927152317882e-05, "loss": 1.0815, "step": 630 }, { "epoch": 65.07, "learning_rate": 2.185430463576159e-05, "loss": 1.0462, "step": 660 }, { "epoch": 68.07, "learning_rate": 2.28476821192053e-05, "loss": 1.0112, "step": 690 }, { "epoch": 71.07, "learning_rate": 2.3841059602649006e-05, "loss": 0.9762, "step": 720 }, { "epoch": 74.07, "learning_rate": 2.4834437086092715e-05, "loss": 0.941, "step": 750 }, { "epoch": 77.07, "learning_rate": 2.5827814569536424e-05, "loss": 0.9048, "step": 780 }, { "epoch": 80.07, "learning_rate": 2.6821192052980134e-05, "loss": 0.8728, "step": 810 }, { "epoch": 83.07, "learning_rate": 2.781456953642384e-05, "loss": 0.839, "step": 840 }, { "epoch": 86.07, "learning_rate": 2.880794701986755e-05, "loss": 0.8041, "step": 870 }, { "epoch": 89.07, "learning_rate": 2.980132450331126e-05, "loss": 0.7714, "step": 900 }, { "epoch": 92.07, "learning_rate": 3.079470198675497e-05, "loss": 0.7396, "step": 930 }, { "epoch": 95.07, "learning_rate": 3.178807947019868e-05, "loss": 0.7062, "step": 960 }, { "epoch": 98.07, "learning_rate": 3.278145695364239e-05, "loss": 0.6731, "step": 990 }, { "epoch": 101.07, "learning_rate": 3.37748344370861e-05, "loss": 0.6376, "step": 1020 }, { "epoch": 104.07, "learning_rate": 3.47682119205298e-05, "loss": 0.6083, "step": 1050 }, { "epoch": 107.07, "learning_rate": 3.576158940397351e-05, "loss": 0.5789, "step": 1080 }, { "epoch": 110.07, "learning_rate": 3.675496688741722e-05, "loss": 0.5487, "step": 1110 }, { "epoch": 113.07, "learning_rate": 3.774834437086093e-05, "loss": 0.5202, "step": 1140 }, { "epoch": 116.07, "learning_rate": 3.8741721854304637e-05, "loss": 0.4941, "step": 1170 }, { "epoch": 119.07, "learning_rate": 3.9735099337748346e-05, "loss": 0.4675, "step": 1200 }, { "epoch": 122.07, "learning_rate": 4.0728476821192055e-05, "loss": 0.4421, "step": 1230 }, { "epoch": 125.07, "learning_rate": 4.1721854304635764e-05, "loss": 0.4175, "step": 1260 }, { "epoch": 128.07, "learning_rate": 4.271523178807947e-05, "loss": 0.3933, "step": 1290 }, { "epoch": 131.07, "learning_rate": 4.370860927152318e-05, "loss": 0.3664, "step": 1320 }, { "epoch": 134.07, "learning_rate": 4.470198675496689e-05, "loss": 0.3446, "step": 1350 }, { "epoch": 137.07, "learning_rate": 4.56953642384106e-05, "loss": 0.32, "step": 1380 }, { "epoch": 140.07, "learning_rate": 4.668874172185431e-05, "loss": 0.305, "step": 1410 }, { "epoch": 143.07, "learning_rate": 4.768211920529801e-05, "loss": 0.2827, "step": 1440 }, { "epoch": 146.07, "learning_rate": 4.867549668874172e-05, "loss": 0.2646, "step": 1470 }, { "epoch": 149.07, "learning_rate": 4.966887417218543e-05, "loss": 0.2455, "step": 1500 }, { "epoch": 152.07, "learning_rate": 5.0662251655629146e-05, "loss": 0.2318, "step": 1530 }, { "epoch": 155.07, "learning_rate": 5.165562913907285e-05, "loss": 0.2141, "step": 1560 }, { "epoch": 158.07, "learning_rate": 5.264900662251656e-05, "loss": 0.1944, "step": 1590 }, { "epoch": 161.07, "learning_rate": 5.364238410596027e-05, "loss": 0.1807, "step": 1620 }, { "epoch": 164.07, "learning_rate": 5.4635761589403976e-05, "loss": 0.1731, "step": 1650 }, { "epoch": 167.07, "learning_rate": 5.562913907284768e-05, "loss": 0.1582, "step": 1680 }, { "epoch": 170.07, "learning_rate": 5.6622516556291394e-05, "loss": 0.1514, "step": 1710 }, { "epoch": 173.07, "learning_rate": 5.76158940397351e-05, "loss": 0.1379, "step": 1740 }, { "epoch": 176.07, "learning_rate": 5.860927152317881e-05, "loss": 0.1324, "step": 1770 }, { "epoch": 179.07, "learning_rate": 5.960264900662252e-05, "loss": 0.1194, "step": 1800 }, { "epoch": 182.07, "learning_rate": 6.0596026490066224e-05, "loss": 0.1099, "step": 1830 }, { "epoch": 185.07, "learning_rate": 6.158940397350994e-05, "loss": 0.1051, "step": 1860 }, { "epoch": 188.07, "learning_rate": 6.258278145695365e-05, "loss": 0.0955, "step": 1890 }, { "epoch": 191.07, "learning_rate": 6.357615894039736e-05, "loss": 0.094, "step": 1920 }, { "epoch": 194.07, "learning_rate": 6.456953642384105e-05, "loss": 0.0853, "step": 1950 }, { "epoch": 197.07, "learning_rate": 6.556291390728478e-05, "loss": 0.0786, "step": 1980 } ], "logging_steps": 30, "max_steps": 30200, "num_train_epochs": 200, "save_steps": 500, "total_flos": 3.2310451765248e+16, "trial_name": null, "trial_params": null }