{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 19, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 3e-05, "loss": 2.6941, "step": 1 }, { "epoch": 0.05, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 2.3869, "eval_samples_per_second": 23.881, "eval_steps_per_second": 1.676, "step": 1 }, { "epoch": 0.11, "learning_rate": 3e-05, "loss": 2.6914, "step": 2 }, { "epoch": 0.11, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 3.2001, "eval_samples_per_second": 17.812, "eval_steps_per_second": 1.25, "step": 2 }, { "epoch": 0.16, "learning_rate": 2.9795419551040836e-05, "loss": 2.6003, "step": 3 }, { "epoch": 0.16, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.6015625, "eval_runtime": 3.1764, "eval_samples_per_second": 17.945, "eval_steps_per_second": 1.259, "step": 3 }, { "epoch": 0.21, "learning_rate": 2.9187258625509518e-05, "loss": 2.5603, "step": 4 }, { "epoch": 0.21, "eval_accuracy": 0.06260483249467337, "eval_loss": 2.5703125, "eval_runtime": 2.9857, "eval_samples_per_second": 19.091, "eval_steps_per_second": 1.34, "step": 4 }, { "epoch": 0.26, "learning_rate": 2.8192106268097336e-05, "loss": 2.606, "step": 5 }, { "epoch": 0.26, "eval_accuracy": 0.06292216328936036, "eval_loss": 2.55078125, "eval_runtime": 2.5694, "eval_samples_per_second": 22.184, "eval_steps_per_second": 1.557, "step": 5 }, { "epoch": 0.32, "learning_rate": 2.6837107640945904e-05, "loss": 2.5439, "step": 6 }, { "epoch": 0.32, "eval_accuracy": 0.06292216328936036, "eval_loss": 2.544921875, "eval_runtime": 3.1779, "eval_samples_per_second": 17.937, "eval_steps_per_second": 1.259, "step": 6 }, { "epoch": 0.37, "learning_rate": 2.5159223574386117e-05, "loss": 2.4449, "step": 7 }, { "epoch": 0.37, "eval_accuracy": 0.06287683031869079, "eval_loss": 2.546875, "eval_runtime": 2.3628, "eval_samples_per_second": 24.124, "eval_steps_per_second": 1.693, "step": 7 }, { "epoch": 0.42, "learning_rate": 2.320422237183641e-05, "loss": 2.5422, "step": 8 }, { "epoch": 0.42, "eval_accuracy": 0.0629901627453647, "eval_loss": 2.546875, "eval_runtime": 3.1712, "eval_samples_per_second": 17.974, "eval_steps_per_second": 1.261, "step": 8 }, { "epoch": 0.47, "learning_rate": 2.1025431369794546e-05, "loss": 2.6101, "step": 9 }, { "epoch": 0.47, "eval_accuracy": 0.06319416111337776, "eval_loss": 2.541015625, "eval_runtime": 2.3922, "eval_samples_per_second": 23.827, "eval_steps_per_second": 1.672, "step": 9 }, { "epoch": 0.53, "learning_rate": 1.8682282307111988e-05, "loss": 2.4482, "step": 10 }, { "epoch": 0.53, "eval_accuracy": 0.0629901627453647, "eval_loss": 2.53515625, "eval_runtime": 2.3705, "eval_samples_per_second": 24.045, "eval_steps_per_second": 1.687, "step": 10 }, { "epoch": 0.58, "learning_rate": 1.623869018208499e-05, "loss": 2.501, "step": 11 }, { "epoch": 0.58, "eval_accuracy": 0.0631261616573734, "eval_loss": 2.529296875, "eval_runtime": 2.7727, "eval_samples_per_second": 20.558, "eval_steps_per_second": 1.443, "step": 11 }, { "epoch": 0.63, "learning_rate": 1.3761309817915017e-05, "loss": 2.5967, "step": 12 }, { "epoch": 0.63, "eval_accuracy": 0.06337549299605603, "eval_loss": 2.521484375, "eval_runtime": 2.168, "eval_samples_per_second": 26.292, "eval_steps_per_second": 1.845, "step": 12 }, { "epoch": 0.68, "learning_rate": 1.1317717692888014e-05, "loss": 2.4998, "step": 13 }, { "epoch": 0.68, "eval_accuracy": 0.06346615893739517, "eval_loss": 2.513671875, "eval_runtime": 3.1858, "eval_samples_per_second": 17.892, "eval_steps_per_second": 1.256, "step": 13 }, { "epoch": 0.74, "learning_rate": 8.974568630205462e-06, "loss": 2.5957, "step": 14 }, { "epoch": 0.74, "eval_accuracy": 0.06364749082007344, "eval_loss": 2.509765625, "eval_runtime": 2.1678, "eval_samples_per_second": 26.294, "eval_steps_per_second": 1.845, "step": 14 }, { "epoch": 0.79, "learning_rate": 6.795777628163599e-06, "loss": 2.5967, "step": 15 }, { "epoch": 0.79, "eval_accuracy": 0.06387415567342128, "eval_loss": 2.50390625, "eval_runtime": 3.188, "eval_samples_per_second": 17.879, "eval_steps_per_second": 1.255, "step": 15 }, { "epoch": 0.84, "learning_rate": 4.840776425613887e-06, "loss": 2.5022, "step": 16 }, { "epoch": 0.84, "eval_accuracy": 0.06373815676141258, "eval_loss": 2.5, "eval_runtime": 3.178, "eval_samples_per_second": 17.936, "eval_steps_per_second": 1.259, "step": 16 }, { "epoch": 0.89, "learning_rate": 3.162892359054098e-06, "loss": 2.4314, "step": 17 }, { "epoch": 0.89, "eval_accuracy": 0.06371549027607779, "eval_loss": 2.498046875, "eval_runtime": 2.968, "eval_samples_per_second": 19.205, "eval_steps_per_second": 1.348, "step": 17 }, { "epoch": 0.95, "learning_rate": 1.8078937319026655e-06, "loss": 2.6279, "step": 18 }, { "epoch": 0.95, "eval_accuracy": 0.06362482433473865, "eval_loss": 2.49609375, "eval_runtime": 3.1736, "eval_samples_per_second": 17.961, "eval_steps_per_second": 1.26, "step": 18 }, { "epoch": 1.0, "learning_rate": 8.127413744904805e-07, "loss": 2.571, "step": 19 }, { "epoch": 1.0, "eval_accuracy": 0.06357949136406908, "eval_loss": 2.49609375, "eval_runtime": 2.9674, "eval_samples_per_second": 19.208, "eval_steps_per_second": 1.348, "step": 19 }, { "epoch": 1.0, "step": 19, "total_flos": 1001216802816.0, "train_loss": 2.5612600226151314, "train_runtime": 437.2347, "train_samples_per_second": 0.693, "train_steps_per_second": 0.043 } ], "max_steps": 19, "num_train_epochs": 1, "total_flos": 1001216802816.0, "trial_name": null, "trial_params": null }