{ "best_metric": 0.9335728010062837, "best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-base-ft-edu-redux/checkpoint-1400", "epoch": 9.0, "global_step": 2664, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "eval_loss": 0.31717467308044434, "eval_lrap": 0.575480030593511, "eval_runtime": 8.2282, "eval_samples_per_second": 81.671, "eval_steps_per_second": 10.209, "step": 100 }, { "epoch": 0.68, "eval_loss": 0.27608808875083923, "eval_lrap": 0.7146675513885474, "eval_runtime": 8.1478, "eval_samples_per_second": 82.476, "eval_steps_per_second": 10.31, "step": 200 }, { "epoch": 1.01, "eval_loss": 0.24148814380168915, "eval_lrap": 0.8075810415317497, "eval_runtime": 8.0351, "eval_samples_per_second": 83.633, "eval_steps_per_second": 10.454, "step": 300 }, { "epoch": 1.35, "eval_loss": 0.2186277061700821, "eval_lrap": 0.825712521334449, "eval_runtime": 8.0683, "eval_samples_per_second": 83.289, "eval_steps_per_second": 10.411, "step": 400 }, { "epoch": 1.69, "learning_rate": 2.193243243243243e-05, "loss": 0.2693, "step": 500 }, { "epoch": 1.69, "eval_loss": 0.21280047297477722, "eval_lrap": 0.8625814785205315, "eval_runtime": 8.196, "eval_samples_per_second": 81.991, "eval_steps_per_second": 10.249, "step": 500 }, { "epoch": 2.03, "eval_loss": 0.20358851552009583, "eval_lrap": 0.8505382989284863, "eval_runtime": 8.1912, "eval_samples_per_second": 82.039, "eval_steps_per_second": 10.255, "step": 600 }, { "epoch": 2.36, "eval_loss": 0.21679966151714325, "eval_lrap": 0.8932179201649483, "eval_runtime": 8.0797, "eval_samples_per_second": 83.172, "eval_steps_per_second": 10.396, "step": 700 }, { "epoch": 2.7, "eval_loss": 0.18326468765735626, "eval_lrap": 0.903219248223994, "eval_runtime": 8.1375, "eval_samples_per_second": 82.58, "eval_steps_per_second": 10.323, "step": 800 }, { "epoch": 3.04, "eval_loss": 0.18310672044754028, "eval_lrap": 0.8821981117114492, "eval_runtime": 8.0697, "eval_samples_per_second": 83.274, "eval_steps_per_second": 10.409, "step": 900 }, { "epoch": 3.38, "learning_rate": 1.6864864864864864e-05, "loss": 0.1665, "step": 1000 }, { "epoch": 3.38, "eval_loss": 0.1836722493171692, "eval_lrap": 0.9224537287097426, "eval_runtime": 8.1462, "eval_samples_per_second": 82.493, "eval_steps_per_second": 10.312, "step": 1000 }, { "epoch": 3.72, "eval_loss": 0.1867215484380722, "eval_lrap": 0.9292872160328117, "eval_runtime": 8.1758, "eval_samples_per_second": 82.194, "eval_steps_per_second": 10.274, "step": 1100 }, { "epoch": 4.05, "eval_loss": 0.17954795062541962, "eval_lrap": 0.9233198336105406, "eval_runtime": 8.1628, "eval_samples_per_second": 82.325, "eval_steps_per_second": 10.291, "step": 1200 }, { "epoch": 4.39, "eval_loss": 0.1790640652179718, "eval_lrap": 0.9050188553107904, "eval_runtime": 8.1622, "eval_samples_per_second": 82.331, "eval_steps_per_second": 10.291, "step": 1300 }, { "epoch": 4.73, "eval_loss": 0.18696065247058868, "eval_lrap": 0.9335728010062837, "eval_runtime": 8.1263, "eval_samples_per_second": 82.694, "eval_steps_per_second": 10.337, "step": 1400 }, { "epoch": 5.07, "learning_rate": 1.1797297297297297e-05, "loss": 0.1146, "step": 1500 }, { "epoch": 5.07, "eval_loss": 0.1917356699705124, "eval_lrap": 0.9100622279217009, "eval_runtime": 8.1497, "eval_samples_per_second": 82.457, "eval_steps_per_second": 10.307, "step": 1500 }, { "epoch": 5.41, "eval_loss": 0.17960630357265472, "eval_lrap": 0.9317027532386741, "eval_runtime": 8.1448, "eval_samples_per_second": 82.507, "eval_steps_per_second": 10.313, "step": 1600 }, { "epoch": 5.74, "eval_loss": 0.18476144969463348, "eval_lrap": 0.9179141722192571, "eval_runtime": 8.0555, "eval_samples_per_second": 83.422, "eval_steps_per_second": 10.428, "step": 1700 }, { "epoch": 6.08, "eval_loss": 0.18720324337482452, "eval_lrap": 0.9076720128841897, "eval_runtime": 8.1385, "eval_samples_per_second": 82.571, "eval_steps_per_second": 10.321, "step": 1800 }, { "epoch": 6.42, "eval_loss": 0.18699432909488678, "eval_lrap": 0.924198462000043, "eval_runtime": 8.1746, "eval_samples_per_second": 82.206, "eval_steps_per_second": 10.276, "step": 1900 }, { "epoch": 6.76, "learning_rate": 6.729729729729729e-06, "loss": 0.0813, "step": 2000 }, { "epoch": 6.76, "eval_loss": 0.17621222138404846, "eval_lrap": 0.915026297261488, "eval_runtime": 8.0568, "eval_samples_per_second": 83.408, "eval_steps_per_second": 10.426, "step": 2000 }, { "epoch": 7.09, "eval_loss": 0.18799513578414917, "eval_lrap": 0.9157933801078312, "eval_runtime": 8.2, "eval_samples_per_second": 81.951, "eval_steps_per_second": 10.244, "step": 2100 }, { "epoch": 7.43, "eval_loss": 0.1881764978170395, "eval_lrap": 0.9201832052904669, "eval_runtime": 8.1918, "eval_samples_per_second": 82.034, "eval_steps_per_second": 10.254, "step": 2200 }, { "epoch": 7.77, "eval_loss": 0.1911892592906952, "eval_lrap": 0.9248369082537248, "eval_runtime": 8.0575, "eval_samples_per_second": 83.401, "eval_steps_per_second": 10.425, "step": 2300 }, { "epoch": 8.11, "eval_loss": 0.18784251809120178, "eval_lrap": 0.9253503927004727, "eval_runtime": 8.1128, "eval_samples_per_second": 82.832, "eval_steps_per_second": 10.354, "step": 2400 }, { "epoch": 8.45, "learning_rate": 1.662162162162162e-06, "loss": 0.0624, "step": 2500 }, { "epoch": 8.45, "eval_loss": 0.19267971813678741, "eval_lrap": 0.9211606593694309, "eval_runtime": 8.201, "eval_samples_per_second": 81.942, "eval_steps_per_second": 10.243, "step": 2500 }, { "epoch": 8.78, "eval_loss": 0.1919146031141281, "eval_lrap": 0.9207750746085017, "eval_runtime": 8.267, "eval_samples_per_second": 81.287, "eval_steps_per_second": 10.161, "step": 2600 }, { "epoch": 9.0, "step": 2664, "total_flos": 5605306439961600.0, "train_loss": 0.1333099156170636, "train_runtime": 980.3632, "train_samples_per_second": 21.73, "train_steps_per_second": 2.717 } ], "max_steps": 2664, "num_train_epochs": 9, "total_flos": 5605306439961600.0, "trial_name": null, "trial_params": null }