|
{ |
|
"best_metric": 0.6859038472175598, |
|
"best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000", |
|
"epoch": 4.835287009063444, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.518375241779499e-06, |
|
"loss": 0.8315, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.7444673180580139, |
|
"eval_runtime": 29.7252, |
|
"eval_samples_per_second": 123.33, |
|
"eval_steps_per_second": 30.849, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.034816247582205e-06, |
|
"loss": 0.7232, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.7441162467002869, |
|
"eval_runtime": 29.4226, |
|
"eval_samples_per_second": 124.598, |
|
"eval_steps_per_second": 31.167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.552224371373308e-06, |
|
"loss": 0.7269, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.7453668117523193, |
|
"eval_runtime": 29.5754, |
|
"eval_samples_per_second": 123.955, |
|
"eval_steps_per_second": 31.006, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.068665377176016e-06, |
|
"loss": 0.7185, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.6859038472175598, |
|
"eval_runtime": 29.569, |
|
"eval_samples_per_second": 123.981, |
|
"eval_steps_per_second": 31.012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.585106382978724e-06, |
|
"loss": 0.6611, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.6883463263511658, |
|
"eval_runtime": 29.5377, |
|
"eval_samples_per_second": 124.113, |
|
"eval_steps_per_second": 31.045, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.101547388781432e-06, |
|
"loss": 0.6516, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.7088809013366699, |
|
"eval_runtime": 29.1585, |
|
"eval_samples_per_second": 125.727, |
|
"eval_steps_per_second": 31.449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.617988394584139e-06, |
|
"loss": 0.6386, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.6939310431480408, |
|
"eval_runtime": 29.2346, |
|
"eval_samples_per_second": 125.399, |
|
"eval_steps_per_second": 31.367, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.134429400386848e-06, |
|
"loss": 0.6435, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.6942200064659119, |
|
"eval_runtime": 30.0324, |
|
"eval_samples_per_second": 122.068, |
|
"eval_steps_per_second": 30.534, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.650870406189556e-06, |
|
"loss": 0.5823, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.771172821521759, |
|
"eval_runtime": 29.4927, |
|
"eval_samples_per_second": 124.302, |
|
"eval_steps_per_second": 31.092, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.167311411992263e-06, |
|
"loss": 0.5652, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.7315438985824585, |
|
"eval_runtime": 28.9938, |
|
"eval_samples_per_second": 126.441, |
|
"eval_steps_per_second": 31.627, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.683752417794971e-06, |
|
"loss": 0.5701, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.7429590225219727, |
|
"eval_runtime": 29.2482, |
|
"eval_samples_per_second": 125.341, |
|
"eval_steps_per_second": 31.352, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 4.20019342359768e-06, |
|
"loss": 0.5583, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 0.74286949634552, |
|
"eval_runtime": 28.9549, |
|
"eval_samples_per_second": 126.611, |
|
"eval_steps_per_second": 31.67, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.7176015473887818e-06, |
|
"loss": 0.5145, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 0.8086823225021362, |
|
"eval_runtime": 29.0547, |
|
"eval_samples_per_second": 126.176, |
|
"eval_steps_per_second": 31.561, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.23404255319149e-06, |
|
"loss": 0.4795, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.8026116490364075, |
|
"eval_runtime": 28.8514, |
|
"eval_samples_per_second": 127.065, |
|
"eval_steps_per_second": 31.784, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.7504835589941973e-06, |
|
"loss": 0.4784, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 0.7970021963119507, |
|
"eval_runtime": 28.8445, |
|
"eval_samples_per_second": 127.095, |
|
"eval_steps_per_second": 31.791, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2669245647969055e-06, |
|
"loss": 0.4793, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.7971013784408569, |
|
"eval_runtime": 29.079, |
|
"eval_samples_per_second": 126.07, |
|
"eval_steps_per_second": 31.535, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.784332688588008e-06, |
|
"loss": 0.4544, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 0.8546438813209534, |
|
"eval_runtime": 29.1238, |
|
"eval_samples_per_second": 125.877, |
|
"eval_steps_per_second": 31.486, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.3007736943907157e-06, |
|
"loss": 0.4115, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.8430052995681763, |
|
"eval_runtime": 29.3455, |
|
"eval_samples_per_second": 124.926, |
|
"eval_steps_per_second": 31.248, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.172147001934237e-07, |
|
"loss": 0.4192, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 0.8553691506385803, |
|
"eval_runtime": 30.0888, |
|
"eval_samples_per_second": 121.839, |
|
"eval_steps_per_second": 30.476, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.3365570599613157e-07, |
|
"loss": 0.4101, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 0.863072395324707, |
|
"eval_runtime": 29.1167, |
|
"eval_samples_per_second": 125.907, |
|
"eval_steps_per_second": 31.494, |
|
"step": 10000 |
|
} |
|
], |
|
"max_steps": 10340, |
|
"num_train_epochs": 5, |
|
"total_flos": 9148295382909930.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|