{ "best_metric": 0.40839770436286926, "best_model_checkpoint": "/content/drive/MyDrive/colab/checkpoint-1700", "epoch": 1.6346153846153846, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.7596153846153844e-05, "loss": 1.033, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.6874200105667114, "eval_runtime": 57.2656, "eval_samples_per_second": 16.153, "eval_steps_per_second": 2.026, "step": 100 }, { "epoch": 0.19, "learning_rate": 4.519230769230769e-05, "loss": 0.6703, "step": 200 }, { "epoch": 0.19, "eval_loss": 0.5938639044761658, "eval_runtime": 57.2626, "eval_samples_per_second": 16.154, "eval_steps_per_second": 2.026, "step": 200 }, { "epoch": 0.29, "learning_rate": 4.278846153846154e-05, "loss": 0.6579, "step": 300 }, { "epoch": 0.29, "eval_loss": 0.49521076679229736, "eval_runtime": 57.2701, "eval_samples_per_second": 16.152, "eval_steps_per_second": 2.025, "step": 300 }, { "epoch": 0.38, "learning_rate": 4.038461538461539e-05, "loss": 0.5068, "step": 400 }, { "epoch": 0.38, "eval_loss": 0.5416558384895325, "eval_runtime": 57.2833, "eval_samples_per_second": 16.148, "eval_steps_per_second": 2.025, "step": 400 }, { "epoch": 0.48, "learning_rate": 3.798076923076923e-05, "loss": 0.5439, "step": 500 }, { "epoch": 0.48, "eval_loss": 0.5001481175422668, "eval_runtime": 57.2802, "eval_samples_per_second": 16.149, "eval_steps_per_second": 2.025, "step": 500 }, { "epoch": 0.58, "learning_rate": 3.557692307692308e-05, "loss": 0.5429, "step": 600 }, { "epoch": 0.58, "eval_loss": 0.47204354405403137, "eval_runtime": 57.2593, "eval_samples_per_second": 16.155, "eval_steps_per_second": 2.026, "step": 600 }, { "epoch": 0.67, "learning_rate": 3.3173076923076926e-05, "loss": 0.4817, "step": 700 }, { "epoch": 0.67, "eval_loss": 0.5970540642738342, "eval_runtime": 57.2593, "eval_samples_per_second": 16.155, "eval_steps_per_second": 2.026, "step": 700 }, { "epoch": 0.77, "learning_rate": 3.0769230769230774e-05, "loss": 0.5023, "step": 800 }, { "epoch": 0.77, "eval_loss": 0.44290876388549805, "eval_runtime": 57.2393, "eval_samples_per_second": 16.16, "eval_steps_per_second": 2.027, "step": 800 }, { "epoch": 0.87, "learning_rate": 2.8365384615384616e-05, "loss": 0.4756, "step": 900 }, { "epoch": 0.87, "eval_loss": 0.49230891466140747, "eval_runtime": 57.0994, "eval_samples_per_second": 16.2, "eval_steps_per_second": 2.032, "step": 900 }, { "epoch": 0.96, "learning_rate": 2.5961538461538464e-05, "loss": 0.4772, "step": 1000 }, { "epoch": 0.96, "eval_loss": 0.4519612491130829, "eval_runtime": 57.1095, "eval_samples_per_second": 16.197, "eval_steps_per_second": 2.031, "step": 1000 }, { "epoch": 1.06, "learning_rate": 2.355769230769231e-05, "loss": 0.336, "step": 1100 }, { "epoch": 1.06, "eval_loss": 0.41091373562812805, "eval_runtime": 57.1093, "eval_samples_per_second": 16.197, "eval_steps_per_second": 2.031, "step": 1100 }, { "epoch": 1.15, "learning_rate": 2.1153846153846154e-05, "loss": 0.2225, "step": 1200 }, { "epoch": 1.15, "eval_loss": 0.4540548324584961, "eval_runtime": 57.116, "eval_samples_per_second": 16.195, "eval_steps_per_second": 2.031, "step": 1200 }, { "epoch": 1.25, "learning_rate": 1.8750000000000002e-05, "loss": 0.2296, "step": 1300 }, { "epoch": 1.25, "eval_loss": 0.4974042475223541, "eval_runtime": 57.1073, "eval_samples_per_second": 16.198, "eval_steps_per_second": 2.031, "step": 1300 }, { "epoch": 1.35, "learning_rate": 1.6346153846153847e-05, "loss": 0.2511, "step": 1400 }, { "epoch": 1.35, "eval_loss": 0.48081809282302856, "eval_runtime": 57.1059, "eval_samples_per_second": 16.198, "eval_steps_per_second": 2.031, "step": 1400 }, { "epoch": 1.44, "learning_rate": 1.3942307692307693e-05, "loss": 0.3349, "step": 1500 }, { "epoch": 1.44, "eval_loss": 0.4536380171775818, "eval_runtime": 57.096, "eval_samples_per_second": 16.201, "eval_steps_per_second": 2.032, "step": 1500 }, { "epoch": 1.54, "learning_rate": 1.153846153846154e-05, "loss": 0.2529, "step": 1600 }, { "epoch": 1.54, "eval_loss": 0.4738934636116028, "eval_runtime": 57.1337, "eval_samples_per_second": 16.19, "eval_steps_per_second": 2.03, "step": 1600 }, { "epoch": 1.63, "learning_rate": 9.134615384615384e-06, "loss": 0.2064, "step": 1700 }, { "epoch": 1.63, "eval_loss": 0.40839770436286926, "eval_runtime": 57.1173, "eval_samples_per_second": 16.195, "eval_steps_per_second": 2.031, "step": 1700 } ], "max_steps": 2080, "num_train_epochs": 2, "total_flos": 3578535250329600.0, "trial_name": null, "trial_params": null }