{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.412280701754386, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 2.973626373626374e-05, "loss": 1.622, "step": 100 }, { "epoch": 0.11, "eval_accuracy": 0.24010217113665389, "eval_loss": 1.6581733226776123, "eval_runtime": 381.1384, "eval_samples_per_second": 4.109, "step": 100 }, { "epoch": 0.22, "learning_rate": 2.9406593406593407e-05, "loss": 1.3721, "step": 200 }, { "epoch": 0.22, "eval_accuracy": 0.2835249042145594, "eval_loss": 1.9170576333999634, "eval_runtime": 378.5115, "eval_samples_per_second": 4.137, "step": 200 }, { "epoch": 0.33, "learning_rate": 2.907692307692308e-05, "loss": 1.1269, "step": 300 }, { "epoch": 0.33, "eval_accuracy": 0.36845466155810985, "eval_loss": 1.7032861709594727, "eval_runtime": 385.3173, "eval_samples_per_second": 4.064, "step": 300 }, { "epoch": 0.44, "learning_rate": 2.8747252747252748e-05, "loss": 0.9152, "step": 400 }, { "epoch": 0.44, "eval_accuracy": 0.388250319284802, "eval_loss": 1.7423878908157349, "eval_runtime": 388.0594, "eval_samples_per_second": 4.035, "step": 400 }, { "epoch": 0.55, "learning_rate": 2.8417582417582416e-05, "loss": 0.732, "step": 500 }, { "epoch": 0.55, "eval_accuracy": 0.4616858237547893, "eval_loss": 1.5253428220748901, "eval_runtime": 386.9842, "eval_samples_per_second": 4.047, "step": 500 }, { "epoch": 0.66, "learning_rate": 2.8087912087912088e-05, "loss": 0.684, "step": 600 }, { "epoch": 0.66, "eval_accuracy": 0.4367816091954023, "eval_loss": 1.6441736221313477, "eval_runtime": 388.1257, "eval_samples_per_second": 4.035, "step": 600 }, { "epoch": 0.77, "learning_rate": 2.775824175824176e-05, "loss": 0.5538, "step": 700 }, { "epoch": 0.77, "eval_accuracy": 0.5108556832694764, "eval_loss": 1.450364351272583, "eval_runtime": 389.4611, "eval_samples_per_second": 4.021, "step": 700 }, { "epoch": 0.88, "learning_rate": 2.7428571428571428e-05, "loss": 0.5425, "step": 800 }, { "epoch": 0.88, "eval_accuracy": 0.49936143039591313, "eval_loss": 1.4108269214630127, "eval_runtime": 389.7612, "eval_samples_per_second": 4.018, "step": 800 }, { "epoch": 0.99, "learning_rate": 2.70989010989011e-05, "loss": 0.514, "step": 900 }, { "epoch": 0.99, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.4206087589263916, "eval_runtime": 396.4756, "eval_samples_per_second": 3.95, "step": 900 }, { "epoch": 1.1, "learning_rate": 2.6769230769230772e-05, "loss": 0.3068, "step": 1000 }, { "epoch": 1.1, "eval_accuracy": 0.5159642401021711, "eval_loss": 1.6294729709625244, "eval_runtime": 394.6782, "eval_samples_per_second": 3.968, "step": 1000 }, { "epoch": 1.21, "learning_rate": 2.643956043956044e-05, "loss": 0.2865, "step": 1100 }, { "epoch": 1.21, "eval_accuracy": 0.5076628352490421, "eval_loss": 1.733625888824463, "eval_runtime": 393.6145, "eval_samples_per_second": 3.979, "step": 1100 }, { "epoch": 1.32, "learning_rate": 2.6109890109890112e-05, "loss": 0.301, "step": 1200 }, { "epoch": 1.32, "eval_accuracy": 0.6277139208173691, "eval_loss": 1.2089332342147827, "eval_runtime": 389.8374, "eval_samples_per_second": 4.017, "step": 1200 }, { "epoch": 1.43, "learning_rate": 2.578021978021978e-05, "loss": 0.2517, "step": 1300 }, { "epoch": 1.43, "eval_accuracy": 0.5983397190293742, "eval_loss": 1.3427765369415283, "eval_runtime": 388.9975, "eval_samples_per_second": 4.026, "step": 1300 }, { "epoch": 1.54, "learning_rate": 2.545054945054945e-05, "loss": 0.2819, "step": 1400 }, { "epoch": 1.54, "eval_accuracy": 0.5600255427841635, "eval_loss": 1.5083413124084473, "eval_runtime": 390.3746, "eval_samples_per_second": 4.012, "step": 1400 }, { "epoch": 1.64, "learning_rate": 2.512087912087912e-05, "loss": 0.2706, "step": 1500 }, { "epoch": 1.64, "eval_accuracy": 0.6047254150702427, "eval_loss": 1.5413047075271606, "eval_runtime": 389.1952, "eval_samples_per_second": 4.024, "step": 1500 }, { "epoch": 1.75, "learning_rate": 2.479120879120879e-05, "loss": 0.1991, "step": 1600 }, { "epoch": 1.75, "eval_accuracy": 0.598978288633461, "eval_loss": 1.5572402477264404, "eval_runtime": 389.1243, "eval_samples_per_second": 4.024, "step": 1600 }, { "epoch": 1.86, "learning_rate": 2.446153846153846e-05, "loss": 0.2726, "step": 1700 }, { "epoch": 1.86, "eval_accuracy": 0.6021711366538953, "eval_loss": 1.4013671875, "eval_runtime": 392.7301, "eval_samples_per_second": 3.987, "step": 1700 }, { "epoch": 1.97, "learning_rate": 2.4131868131868133e-05, "loss": 0.2015, "step": 1800 }, { "epoch": 1.97, "eval_accuracy": 0.5676883780332056, "eval_loss": 2.248255729675293, "eval_runtime": 389.7999, "eval_samples_per_second": 4.017, "step": 1800 }, { "epoch": 2.08, "learning_rate": 2.38021978021978e-05, "loss": 0.173, "step": 1900 }, { "epoch": 2.08, "eval_accuracy": 0.644316730523627, "eval_loss": 1.5393086671829224, "eval_runtime": 390.3149, "eval_samples_per_second": 4.012, "step": 1900 }, { "epoch": 2.19, "learning_rate": 2.3472527472527474e-05, "loss": 0.1177, "step": 2000 }, { "epoch": 2.19, "eval_accuracy": 0.6028097062579821, "eval_loss": 1.9018601179122925, "eval_runtime": 390.5708, "eval_samples_per_second": 4.01, "step": 2000 }, { "epoch": 2.3, "learning_rate": 2.3142857142857145e-05, "loss": 0.0907, "step": 2100 }, { "epoch": 2.3, "eval_accuracy": 0.6194125159642401, "eval_loss": 1.8655920028686523, "eval_runtime": 392.1244, "eval_samples_per_second": 3.994, "step": 2100 }, { "epoch": 2.41, "learning_rate": 2.2813186813186814e-05, "loss": 0.1878, "step": 2200 }, { "epoch": 2.41, "eval_accuracy": 0.6296296296296297, "eval_loss": 1.998379111289978, "eval_runtime": 390.0846, "eval_samples_per_second": 4.015, "step": 2200 } ], "max_steps": 9120, "num_train_epochs": 10, "total_flos": 2.137068831744e+19, "trial_name": null, "trial_params": null }