{ "best_metric": 0.7537589073181152, "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000", "epoch": 0.9667673716012085, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 2e-07, "loss": 1.0361, "step": 100 }, { "epoch": 0.1, "learning_rate": 3.98e-07, "loss": 0.9751, "step": 200 }, { "epoch": 0.15, "learning_rate": 5.979999999999999e-07, "loss": 0.9807, "step": 300 }, { "epoch": 0.19, "learning_rate": 7.94e-07, "loss": 0.9513, "step": 400 }, { "epoch": 0.24, "learning_rate": 9.94e-07, "loss": 0.9237, "step": 500 }, { "epoch": 0.24, "eval_loss": 0.9464540481567383, "eval_runtime": 37.5769, "eval_samples_per_second": 97.587, "eval_steps_per_second": 24.403, "step": 500 }, { "epoch": 0.29, "learning_rate": 9.955915919781764e-07, "loss": 0.9367, "step": 600 }, { "epoch": 0.34, "learning_rate": 9.81900265076038e-07, "loss": 0.8993, "step": 700 }, { "epoch": 0.39, "learning_rate": 9.594490241150311e-07, "loss": 0.8586, "step": 800 }, { "epoch": 0.44, "learning_rate": 9.282057505552949e-07, "loss": 0.854, "step": 900 }, { "epoch": 0.48, "learning_rate": 8.889381125453379e-07, "loss": 0.8642, "step": 1000 }, { "epoch": 0.48, "eval_loss": 0.8270628452301025, "eval_runtime": 28.2214, "eval_samples_per_second": 129.937, "eval_steps_per_second": 32.493, "step": 1000 }, { "epoch": 0.53, "learning_rate": 8.423819662432867e-07, "loss": 0.8314, "step": 1100 }, { "epoch": 0.58, "learning_rate": 7.894097508558568e-07, "loss": 0.8317, "step": 1200 }, { "epoch": 0.63, "learning_rate": 7.310141395581585e-07, "loss": 0.7939, "step": 1300 }, { "epoch": 0.68, "learning_rate": 6.682894372882701e-07, "loss": 0.803, "step": 1400 }, { "epoch": 0.73, "learning_rate": 6.024110740127264e-07, "loss": 0.8029, "step": 1500 }, { "epoch": 0.73, "eval_loss": 0.7635419368743896, "eval_runtime": 28.2809, "eval_samples_per_second": 129.663, "eval_steps_per_second": 32.425, "step": 1500 }, { "epoch": 0.77, "learning_rate": 5.346135777490083e-07, "loss": 0.7959, "step": 1600 }, { "epoch": 0.82, "learning_rate": 4.6616744011972247e-07, "loss": 0.7708, "step": 1700 }, { "epoch": 0.87, "learning_rate": 3.9835530796656867e-07, "loss": 0.7636, "step": 1800 }, { "epoch": 0.92, "learning_rate": 3.3244794718149894e-07, "loss": 0.7864, "step": 1900 }, { "epoch": 0.97, "learning_rate": 2.696804291810131e-07, "loss": 0.8005, "step": 2000 }, { "epoch": 0.97, "eval_loss": 0.7537589073181152, "eval_runtime": 28.4034, "eval_samples_per_second": 129.104, "eval_steps_per_second": 32.285, "step": 2000 } ], "max_steps": 16544, "num_train_epochs": 8, "total_flos": 1828884830909760.0, "trial_name": null, "trial_params": null }