|
{ |
|
"best_metric": 0.9335728010062837, |
|
"best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-base-ft-edu-redux/checkpoint-1400", |
|
"epoch": 9.0, |
|
"global_step": 2664, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.31717467308044434, |
|
"eval_lrap": 0.575480030593511, |
|
"eval_runtime": 8.2282, |
|
"eval_samples_per_second": 81.671, |
|
"eval_steps_per_second": 10.209, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.27608808875083923, |
|
"eval_lrap": 0.7146675513885474, |
|
"eval_runtime": 8.1478, |
|
"eval_samples_per_second": 82.476, |
|
"eval_steps_per_second": 10.31, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.24148814380168915, |
|
"eval_lrap": 0.8075810415317497, |
|
"eval_runtime": 8.0351, |
|
"eval_samples_per_second": 83.633, |
|
"eval_steps_per_second": 10.454, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.2186277061700821, |
|
"eval_lrap": 0.825712521334449, |
|
"eval_runtime": 8.0683, |
|
"eval_samples_per_second": 83.289, |
|
"eval_steps_per_second": 10.411, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.193243243243243e-05, |
|
"loss": 0.2693, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.21280047297477722, |
|
"eval_lrap": 0.8625814785205315, |
|
"eval_runtime": 8.196, |
|
"eval_samples_per_second": 81.991, |
|
"eval_steps_per_second": 10.249, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.20358851552009583, |
|
"eval_lrap": 0.8505382989284863, |
|
"eval_runtime": 8.1912, |
|
"eval_samples_per_second": 82.039, |
|
"eval_steps_per_second": 10.255, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.21679966151714325, |
|
"eval_lrap": 0.8932179201649483, |
|
"eval_runtime": 8.0797, |
|
"eval_samples_per_second": 83.172, |
|
"eval_steps_per_second": 10.396, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 0.18326468765735626, |
|
"eval_lrap": 0.903219248223994, |
|
"eval_runtime": 8.1375, |
|
"eval_samples_per_second": 82.58, |
|
"eval_steps_per_second": 10.323, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 0.18310672044754028, |
|
"eval_lrap": 0.8821981117114492, |
|
"eval_runtime": 8.0697, |
|
"eval_samples_per_second": 83.274, |
|
"eval_steps_per_second": 10.409, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6864864864864864e-05, |
|
"loss": 0.1665, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.1836722493171692, |
|
"eval_lrap": 0.9224537287097426, |
|
"eval_runtime": 8.1462, |
|
"eval_samples_per_second": 82.493, |
|
"eval_steps_per_second": 10.312, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 0.1867215484380722, |
|
"eval_lrap": 0.9292872160328117, |
|
"eval_runtime": 8.1758, |
|
"eval_samples_per_second": 82.194, |
|
"eval_steps_per_second": 10.274, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 0.17954795062541962, |
|
"eval_lrap": 0.9233198336105406, |
|
"eval_runtime": 8.1628, |
|
"eval_samples_per_second": 82.325, |
|
"eval_steps_per_second": 10.291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 0.1790640652179718, |
|
"eval_lrap": 0.9050188553107904, |
|
"eval_runtime": 8.1622, |
|
"eval_samples_per_second": 82.331, |
|
"eval_steps_per_second": 10.291, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 0.18696065247058868, |
|
"eval_lrap": 0.9335728010062837, |
|
"eval_runtime": 8.1263, |
|
"eval_samples_per_second": 82.694, |
|
"eval_steps_per_second": 10.337, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.1797297297297297e-05, |
|
"loss": 0.1146, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 0.1917356699705124, |
|
"eval_lrap": 0.9100622279217009, |
|
"eval_runtime": 8.1497, |
|
"eval_samples_per_second": 82.457, |
|
"eval_steps_per_second": 10.307, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.17960630357265472, |
|
"eval_lrap": 0.9317027532386741, |
|
"eval_runtime": 8.1448, |
|
"eval_samples_per_second": 82.507, |
|
"eval_steps_per_second": 10.313, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 0.18476144969463348, |
|
"eval_lrap": 0.9179141722192571, |
|
"eval_runtime": 8.0555, |
|
"eval_samples_per_second": 83.422, |
|
"eval_steps_per_second": 10.428, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 0.18720324337482452, |
|
"eval_lrap": 0.9076720128841897, |
|
"eval_runtime": 8.1385, |
|
"eval_samples_per_second": 82.571, |
|
"eval_steps_per_second": 10.321, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.18699432909488678, |
|
"eval_lrap": 0.924198462000043, |
|
"eval_runtime": 8.1746, |
|
"eval_samples_per_second": 82.206, |
|
"eval_steps_per_second": 10.276, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.729729729729729e-06, |
|
"loss": 0.0813, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 0.17621222138404846, |
|
"eval_lrap": 0.915026297261488, |
|
"eval_runtime": 8.0568, |
|
"eval_samples_per_second": 83.408, |
|
"eval_steps_per_second": 10.426, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_loss": 0.18799513578414917, |
|
"eval_lrap": 0.9157933801078312, |
|
"eval_runtime": 8.2, |
|
"eval_samples_per_second": 81.951, |
|
"eval_steps_per_second": 10.244, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_loss": 0.1881764978170395, |
|
"eval_lrap": 0.9201832052904669, |
|
"eval_runtime": 8.1918, |
|
"eval_samples_per_second": 82.034, |
|
"eval_steps_per_second": 10.254, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 0.1911892592906952, |
|
"eval_lrap": 0.9248369082537248, |
|
"eval_runtime": 8.0575, |
|
"eval_samples_per_second": 83.401, |
|
"eval_steps_per_second": 10.425, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"eval_loss": 0.18784251809120178, |
|
"eval_lrap": 0.9253503927004727, |
|
"eval_runtime": 8.1128, |
|
"eval_samples_per_second": 82.832, |
|
"eval_steps_per_second": 10.354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.662162162162162e-06, |
|
"loss": 0.0624, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.19267971813678741, |
|
"eval_lrap": 0.9211606593694309, |
|
"eval_runtime": 8.201, |
|
"eval_samples_per_second": 81.942, |
|
"eval_steps_per_second": 10.243, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_loss": 0.1919146031141281, |
|
"eval_lrap": 0.9207750746085017, |
|
"eval_runtime": 8.267, |
|
"eval_samples_per_second": 81.287, |
|
"eval_steps_per_second": 10.161, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 2664, |
|
"total_flos": 5605306439961600.0, |
|
"train_loss": 0.1333099156170636, |
|
"train_runtime": 980.3632, |
|
"train_samples_per_second": 21.73, |
|
"train_steps_per_second": 2.717 |
|
} |
|
], |
|
"max_steps": 2664, |
|
"num_train_epochs": 9, |
|
"total_flos": 5605306439961600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|