{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 10075, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 9.8014888337469e-06, "loss": 0.2058, "step": 200 }, { "epoch": 0.2, "learning_rate": 9.602977667493797e-06, "loss": 0.1586, "step": 400 }, { "epoch": 0.3, "learning_rate": 9.404466501240696e-06, "loss": 0.1283, "step": 600 }, { "epoch": 0.4, "learning_rate": 9.205955334987593e-06, "loss": 0.1288, "step": 800 }, { "epoch": 0.5, "learning_rate": 9.007444168734492e-06, "loss": 0.1259, "step": 1000 }, { "epoch": 0.6, "learning_rate": 8.80893300248139e-06, "loss": 0.1196, "step": 1200 }, { "epoch": 0.69, "learning_rate": 8.610421836228289e-06, "loss": 0.125, "step": 1400 }, { "epoch": 0.79, "learning_rate": 8.411910669975186e-06, "loss": 0.1152, "step": 1600 }, { "epoch": 0.89, "learning_rate": 8.213399503722085e-06, "loss": 0.1141, "step": 1800 }, { "epoch": 0.99, "learning_rate": 8.014888337468984e-06, "loss": 0.1088, "step": 2000 }, { "epoch": 1.09, "learning_rate": 7.816377171215881e-06, "loss": 0.0938, "step": 2200 }, { "epoch": 1.19, "learning_rate": 7.6178660049627794e-06, "loss": 0.0882, "step": 2400 }, { "epoch": 1.29, "learning_rate": 7.4193548387096784e-06, "loss": 0.0908, "step": 2600 }, { "epoch": 1.39, "learning_rate": 7.220843672456577e-06, "loss": 0.0982, "step": 2800 }, { "epoch": 1.49, "learning_rate": 7.022332506203474e-06, "loss": 0.0938, "step": 3000 }, { "epoch": 1.59, "learning_rate": 6.823821339950373e-06, "loss": 0.0997, "step": 3200 }, { "epoch": 1.69, "learning_rate": 6.625310173697271e-06, "loss": 0.0903, "step": 3400 }, { "epoch": 1.79, "learning_rate": 6.426799007444169e-06, "loss": 0.1015, "step": 3600 }, { "epoch": 1.89, "learning_rate": 6.228287841191067e-06, "loss": 0.0719, "step": 3800 }, { "epoch": 1.99, "learning_rate": 6.029776674937966e-06, "loss": 0.0874, "step": 4000 }, { "epoch": 2.08, "learning_rate": 5.831265508684864e-06, "loss": 0.0783, "step": 4200 }, { "epoch": 2.18, "learning_rate": 5.632754342431762e-06, "loss": 0.0737, "step": 4400 }, { "epoch": 2.28, "learning_rate": 5.434243176178661e-06, "loss": 0.0657, "step": 4600 }, { "epoch": 2.38, "learning_rate": 5.235732009925558e-06, "loss": 0.0754, "step": 4800 }, { "epoch": 2.48, "learning_rate": 5.037220843672457e-06, "loss": 0.0734, "step": 5000 }, { "epoch": 2.58, "learning_rate": 4.838709677419355e-06, "loss": 0.081, "step": 5200 }, { "epoch": 2.68, "learning_rate": 4.640198511166253e-06, "loss": 0.0701, "step": 5400 }, { "epoch": 2.78, "learning_rate": 4.4416873449131515e-06, "loss": 0.0781, "step": 5600 }, { "epoch": 2.88, "learning_rate": 4.24317617866005e-06, "loss": 0.074, "step": 5800 }, { "epoch": 2.98, "learning_rate": 4.044665012406948e-06, "loss": 0.0722, "step": 6000 }, { "epoch": 3.08, "learning_rate": 3.846153846153847e-06, "loss": 0.0551, "step": 6200 }, { "epoch": 3.18, "learning_rate": 3.6476426799007445e-06, "loss": 0.0526, "step": 6400 }, { "epoch": 3.28, "learning_rate": 3.4491315136476427e-06, "loss": 0.0645, "step": 6600 }, { "epoch": 3.37, "learning_rate": 3.2506203473945412e-06, "loss": 0.064, "step": 6800 }, { "epoch": 3.47, "learning_rate": 3.0521091811414394e-06, "loss": 0.0569, "step": 7000 }, { "epoch": 3.57, "learning_rate": 2.8535980148883375e-06, "loss": 0.066, "step": 7200 }, { "epoch": 3.67, "learning_rate": 2.655086848635236e-06, "loss": 0.0616, "step": 7400 }, { "epoch": 3.77, "learning_rate": 2.4565756823821343e-06, "loss": 0.0622, "step": 7600 }, { "epoch": 3.87, "learning_rate": 2.2580645161290324e-06, "loss": 0.0561, "step": 7800 }, { "epoch": 3.97, "learning_rate": 2.0595533498759305e-06, "loss": 0.0544, "step": 8000 }, { "epoch": 4.07, "learning_rate": 1.861042183622829e-06, "loss": 0.0541, "step": 8200 }, { "epoch": 4.17, "learning_rate": 1.662531017369727e-06, "loss": 0.0532, "step": 8400 }, { "epoch": 4.27, "learning_rate": 1.4640198511166254e-06, "loss": 0.046, "step": 8600 }, { "epoch": 4.37, "learning_rate": 1.2655086848635238e-06, "loss": 0.0522, "step": 8800 }, { "epoch": 4.47, "learning_rate": 1.066997518610422e-06, "loss": 0.0575, "step": 9000 }, { "epoch": 4.57, "learning_rate": 8.684863523573202e-07, "loss": 0.0534, "step": 9200 }, { "epoch": 4.67, "learning_rate": 6.699751861042183e-07, "loss": 0.0466, "step": 9400 }, { "epoch": 4.76, "learning_rate": 4.714640198511167e-07, "loss": 0.0485, "step": 9600 }, { "epoch": 4.86, "learning_rate": 2.7295285359801494e-07, "loss": 0.0624, "step": 9800 }, { "epoch": 4.96, "learning_rate": 7.444168734491315e-08, "loss": 0.0436, "step": 10000 }, { "epoch": 5.0, "step": 10075, "total_flos": 66843438723916800, "train_runtime": 8275.4395, "train_samples_per_second": 1.217 } ], "max_steps": 10075, "num_train_epochs": 5, "total_flos": 66843438723916800, "trial_name": null, "trial_params": null }