{ "best_metric": 0.6859038472175598, "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000", "epoch": 4.835287009063444, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 9.518375241779499e-06, "loss": 0.8315, "step": 500 }, { "epoch": 0.24, "eval_loss": 0.7444673180580139, "eval_runtime": 29.7252, "eval_samples_per_second": 123.33, "eval_steps_per_second": 30.849, "step": 500 }, { "epoch": 0.48, "learning_rate": 9.034816247582205e-06, "loss": 0.7232, "step": 1000 }, { "epoch": 0.48, "eval_loss": 0.7441162467002869, "eval_runtime": 29.4226, "eval_samples_per_second": 124.598, "eval_steps_per_second": 31.167, "step": 1000 }, { "epoch": 0.73, "learning_rate": 8.552224371373308e-06, "loss": 0.7269, "step": 1500 }, { "epoch": 0.73, "eval_loss": 0.7453668117523193, "eval_runtime": 29.5754, "eval_samples_per_second": 123.955, "eval_steps_per_second": 31.006, "step": 1500 }, { "epoch": 0.97, "learning_rate": 8.068665377176016e-06, "loss": 0.7185, "step": 2000 }, { "epoch": 0.97, "eval_loss": 0.6859038472175598, "eval_runtime": 29.569, "eval_samples_per_second": 123.981, "eval_steps_per_second": 31.012, "step": 2000 }, { "epoch": 1.21, "learning_rate": 7.585106382978724e-06, "loss": 0.6611, "step": 2500 }, { "epoch": 1.21, "eval_loss": 0.6883463263511658, "eval_runtime": 29.5377, "eval_samples_per_second": 124.113, "eval_steps_per_second": 31.045, "step": 2500 }, { "epoch": 1.45, "learning_rate": 7.101547388781432e-06, "loss": 0.6516, "step": 3000 }, { "epoch": 1.45, "eval_loss": 0.7088809013366699, "eval_runtime": 29.1585, "eval_samples_per_second": 125.727, "eval_steps_per_second": 31.449, "step": 3000 }, { "epoch": 1.69, "learning_rate": 6.617988394584139e-06, "loss": 0.6386, "step": 3500 }, { "epoch": 1.69, "eval_loss": 0.6939310431480408, "eval_runtime": 29.2346, "eval_samples_per_second": 125.399, "eval_steps_per_second": 31.367, "step": 3500 }, { "epoch": 1.93, "learning_rate": 6.134429400386848e-06, "loss": 0.6435, "step": 4000 }, { "epoch": 1.93, "eval_loss": 0.6942200064659119, "eval_runtime": 30.0324, "eval_samples_per_second": 122.068, "eval_steps_per_second": 30.534, "step": 4000 }, { "epoch": 2.18, "learning_rate": 5.650870406189556e-06, "loss": 0.5823, "step": 4500 }, { "epoch": 2.18, "eval_loss": 0.771172821521759, "eval_runtime": 29.4927, "eval_samples_per_second": 124.302, "eval_steps_per_second": 31.092, "step": 4500 }, { "epoch": 2.42, "learning_rate": 5.167311411992263e-06, "loss": 0.5652, "step": 5000 }, { "epoch": 2.42, "eval_loss": 0.7315438985824585, "eval_runtime": 28.9938, "eval_samples_per_second": 126.441, "eval_steps_per_second": 31.627, "step": 5000 }, { "epoch": 2.66, "learning_rate": 4.683752417794971e-06, "loss": 0.5701, "step": 5500 }, { "epoch": 2.66, "eval_loss": 0.7429590225219727, "eval_runtime": 29.2482, "eval_samples_per_second": 125.341, "eval_steps_per_second": 31.352, "step": 5500 }, { "epoch": 2.9, "learning_rate": 4.20019342359768e-06, "loss": 0.5583, "step": 6000 }, { "epoch": 2.9, "eval_loss": 0.74286949634552, "eval_runtime": 28.9549, "eval_samples_per_second": 126.611, "eval_steps_per_second": 31.67, "step": 6000 }, { "epoch": 3.14, "learning_rate": 3.7176015473887818e-06, "loss": 0.5145, "step": 6500 }, { "epoch": 3.14, "eval_loss": 0.8086823225021362, "eval_runtime": 29.0547, "eval_samples_per_second": 126.176, "eval_steps_per_second": 31.561, "step": 6500 }, { "epoch": 3.38, "learning_rate": 3.23404255319149e-06, "loss": 0.4795, "step": 7000 }, { "epoch": 3.38, "eval_loss": 0.8026116490364075, "eval_runtime": 28.8514, "eval_samples_per_second": 127.065, "eval_steps_per_second": 31.784, "step": 7000 }, { "epoch": 3.63, "learning_rate": 2.7504835589941973e-06, "loss": 0.4784, "step": 7500 }, { "epoch": 3.63, "eval_loss": 0.7970021963119507, "eval_runtime": 28.8445, "eval_samples_per_second": 127.095, "eval_steps_per_second": 31.791, "step": 7500 }, { "epoch": 3.87, "learning_rate": 2.2669245647969055e-06, "loss": 0.4793, "step": 8000 }, { "epoch": 3.87, "eval_loss": 0.7971013784408569, "eval_runtime": 29.079, "eval_samples_per_second": 126.07, "eval_steps_per_second": 31.535, "step": 8000 }, { "epoch": 4.11, "learning_rate": 1.784332688588008e-06, "loss": 0.4544, "step": 8500 }, { "epoch": 4.11, "eval_loss": 0.8546438813209534, "eval_runtime": 29.1238, "eval_samples_per_second": 125.877, "eval_steps_per_second": 31.486, "step": 8500 }, { "epoch": 4.35, "learning_rate": 1.3007736943907157e-06, "loss": 0.4115, "step": 9000 }, { "epoch": 4.35, "eval_loss": 0.8430052995681763, "eval_runtime": 29.3455, "eval_samples_per_second": 124.926, "eval_steps_per_second": 31.248, "step": 9000 }, { "epoch": 4.59, "learning_rate": 8.172147001934237e-07, "loss": 0.4192, "step": 9500 }, { "epoch": 4.59, "eval_loss": 0.8553691506385803, "eval_runtime": 30.0888, "eval_samples_per_second": 121.839, "eval_steps_per_second": 30.476, "step": 9500 }, { "epoch": 4.84, "learning_rate": 3.3365570599613157e-07, "loss": 0.4101, "step": 10000 }, { "epoch": 4.84, "eval_loss": 0.863072395324707, "eval_runtime": 29.1167, "eval_samples_per_second": 125.907, "eval_steps_per_second": 31.494, "step": 10000 } ], "max_steps": 10340, "num_train_epochs": 5, "total_flos": 9148295382909930.0, "trial_name": null, "trial_params": null }