{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.79150915963943, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.122448979591835e-06, "loss": 1.1171, "step": 1 }, { "epoch": 0.06, "learning_rate": 0.00029680384374347184, "loss": 1.0109, "step": 100 }, { "epoch": 0.12, "learning_rate": 0.000290536870691456, "loss": 0.9896, "step": 200 }, { "epoch": 0.17, "learning_rate": 0.0002842698976394401, "loss": 0.9585, "step": 300 }, { "epoch": 0.23, "learning_rate": 0.0002780029245874242, "loss": 0.9377, "step": 400 }, { "epoch": 0.29, "learning_rate": 0.00027173595153540835, "loss": 0.9123, "step": 500 }, { "epoch": 0.35, "learning_rate": 0.0002654689784833925, "loss": 0.9131, "step": 600 }, { "epoch": 0.41, "learning_rate": 0.00025920200543137664, "loss": 0.9059, "step": 700 }, { "epoch": 0.47, "learning_rate": 0.0002529350323793608, "loss": 0.9045, "step": 800 }, { "epoch": 0.52, "learning_rate": 0.00024666805932734487, "loss": 0.8838, "step": 900 }, { "epoch": 0.58, "learning_rate": 0.000240401086275329, "loss": 0.8889, "step": 1000 }, { "epoch": 0.64, "learning_rate": 0.00023413411322331312, "loss": 0.8715, "step": 1100 }, { "epoch": 0.7, "learning_rate": 0.00022786714017129724, "loss": 0.8569, "step": 1200 }, { "epoch": 0.76, "learning_rate": 0.00022160016711928138, "loss": 0.8623, "step": 1300 }, { "epoch": 0.81, "learning_rate": 0.0002153331940672655, "loss": 0.8612, "step": 1400 }, { "epoch": 0.87, "learning_rate": 0.0002090662210152496, "loss": 0.8409, "step": 1500 }, { "epoch": 0.93, "learning_rate": 0.00020279924796323375, "loss": 0.846, "step": 1600 }, { "epoch": 0.99, "learning_rate": 0.00019653227491121786, "loss": 0.8467, "step": 1700 }, { "epoch": 1.05, "learning_rate": 0.00019026530185920198, "loss": 0.7872, "step": 1800 }, { "epoch": 1.1, "learning_rate": 0.0001839983288071861, "loss": 0.7711, "step": 1900 }, { "epoch": 1.16, "learning_rate": 0.00017773135575517023, "loss": 0.7451, "step": 2000 }, { "epoch": 1.22, "learning_rate": 0.00017146438270315435, "loss": 0.754, "step": 2100 }, { "epoch": 1.28, "learning_rate": 0.0001651974096511385, "loss": 0.7648, "step": 2200 }, { "epoch": 1.34, "learning_rate": 0.00015893043659912263, "loss": 0.7585, "step": 2300 }, { "epoch": 1.4, "learning_rate": 0.00015266346354710674, "loss": 0.7654, "step": 2400 }, { "epoch": 1.45, "learning_rate": 0.00014639649049509086, "loss": 0.7578, "step": 2500 }, { "epoch": 1.51, "learning_rate": 0.000140129517443075, "loss": 0.7324, "step": 2600 }, { "epoch": 1.57, "learning_rate": 0.00013386254439105911, "loss": 0.7439, "step": 2700 }, { "epoch": 1.63, "learning_rate": 0.00012759557133904323, "loss": 0.7515, "step": 2800 }, { "epoch": 1.69, "learning_rate": 0.00012132859828702736, "loss": 0.7484, "step": 2900 }, { "epoch": 1.74, "learning_rate": 0.00011506162523501148, "loss": 0.7466, "step": 3000 }, { "epoch": 1.8, "learning_rate": 0.0001087946521829956, "loss": 0.7242, "step": 3100 }, { "epoch": 1.86, "learning_rate": 0.00010252767913097973, "loss": 0.7357, "step": 3200 }, { "epoch": 1.92, "learning_rate": 9.626070607896384e-05, "loss": 0.7283, "step": 3300 }, { "epoch": 1.98, "learning_rate": 8.999373302694798e-05, "loss": 0.7375, "step": 3400 }, { "epoch": 2.04, "learning_rate": 8.372675997493211e-05, "loss": 0.6901, "step": 3500 }, { "epoch": 2.09, "learning_rate": 7.745978692291623e-05, "loss": 0.6553, "step": 3600 }, { "epoch": 2.15, "learning_rate": 7.119281387090035e-05, "loss": 0.6572, "step": 3700 }, { "epoch": 2.21, "learning_rate": 6.492584081888447e-05, "loss": 0.6609, "step": 3800 }, { "epoch": 2.27, "learning_rate": 5.8658867766868595e-05, "loss": 0.6704, "step": 3900 }, { "epoch": 2.33, "learning_rate": 5.239189471485272e-05, "loss": 0.6513, "step": 4000 }, { "epoch": 2.38, "learning_rate": 4.6124921662836844e-05, "loss": 0.6589, "step": 4100 }, { "epoch": 2.44, "learning_rate": 3.985794861082097e-05, "loss": 0.6755, "step": 4200 }, { "epoch": 2.5, "learning_rate": 3.359097555880509e-05, "loss": 0.6519, "step": 4300 }, { "epoch": 2.56, "learning_rate": 2.7324002506789218e-05, "loss": 0.6512, "step": 4400 }, { "epoch": 2.62, "learning_rate": 2.1057029454773346e-05, "loss": 0.6537, "step": 4500 }, { "epoch": 2.68, "learning_rate": 1.4790056402757467e-05, "loss": 0.6436, "step": 4600 }, { "epoch": 2.73, "learning_rate": 8.523083350741591e-06, "loss": 0.6488, "step": 4700 }, { "epoch": 2.79, "learning_rate": 2.2561102987257156e-06, "loss": 0.6393, "step": 4800 } ], "max_steps": 4836, "num_train_epochs": 3, "total_flos": 8.901906445628056e+18, "trial_name": null, "trial_params": null }