gbert-base-ft-edu-redux / trainer_state.json
gonzpen's picture
Upload trainer_state.json
a5a4f63
raw
history blame
7.56 kB
{
"best_metric": 0.9335728010062837,
"best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-base-ft-edu-redux/checkpoint-1400",
"epoch": 9.0,
"global_step": 2664,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.34,
"eval_loss": 0.31717467308044434,
"eval_lrap": 0.575480030593511,
"eval_runtime": 8.2282,
"eval_samples_per_second": 81.671,
"eval_steps_per_second": 10.209,
"step": 100
},
{
"epoch": 0.68,
"eval_loss": 0.27608808875083923,
"eval_lrap": 0.7146675513885474,
"eval_runtime": 8.1478,
"eval_samples_per_second": 82.476,
"eval_steps_per_second": 10.31,
"step": 200
},
{
"epoch": 1.01,
"eval_loss": 0.24148814380168915,
"eval_lrap": 0.8075810415317497,
"eval_runtime": 8.0351,
"eval_samples_per_second": 83.633,
"eval_steps_per_second": 10.454,
"step": 300
},
{
"epoch": 1.35,
"eval_loss": 0.2186277061700821,
"eval_lrap": 0.825712521334449,
"eval_runtime": 8.0683,
"eval_samples_per_second": 83.289,
"eval_steps_per_second": 10.411,
"step": 400
},
{
"epoch": 1.69,
"learning_rate": 2.193243243243243e-05,
"loss": 0.2693,
"step": 500
},
{
"epoch": 1.69,
"eval_loss": 0.21280047297477722,
"eval_lrap": 0.8625814785205315,
"eval_runtime": 8.196,
"eval_samples_per_second": 81.991,
"eval_steps_per_second": 10.249,
"step": 500
},
{
"epoch": 2.03,
"eval_loss": 0.20358851552009583,
"eval_lrap": 0.8505382989284863,
"eval_runtime": 8.1912,
"eval_samples_per_second": 82.039,
"eval_steps_per_second": 10.255,
"step": 600
},
{
"epoch": 2.36,
"eval_loss": 0.21679966151714325,
"eval_lrap": 0.8932179201649483,
"eval_runtime": 8.0797,
"eval_samples_per_second": 83.172,
"eval_steps_per_second": 10.396,
"step": 700
},
{
"epoch": 2.7,
"eval_loss": 0.18326468765735626,
"eval_lrap": 0.903219248223994,
"eval_runtime": 8.1375,
"eval_samples_per_second": 82.58,
"eval_steps_per_second": 10.323,
"step": 800
},
{
"epoch": 3.04,
"eval_loss": 0.18310672044754028,
"eval_lrap": 0.8821981117114492,
"eval_runtime": 8.0697,
"eval_samples_per_second": 83.274,
"eval_steps_per_second": 10.409,
"step": 900
},
{
"epoch": 3.38,
"learning_rate": 1.6864864864864864e-05,
"loss": 0.1665,
"step": 1000
},
{
"epoch": 3.38,
"eval_loss": 0.1836722493171692,
"eval_lrap": 0.9224537287097426,
"eval_runtime": 8.1462,
"eval_samples_per_second": 82.493,
"eval_steps_per_second": 10.312,
"step": 1000
},
{
"epoch": 3.72,
"eval_loss": 0.1867215484380722,
"eval_lrap": 0.9292872160328117,
"eval_runtime": 8.1758,
"eval_samples_per_second": 82.194,
"eval_steps_per_second": 10.274,
"step": 1100
},
{
"epoch": 4.05,
"eval_loss": 0.17954795062541962,
"eval_lrap": 0.9233198336105406,
"eval_runtime": 8.1628,
"eval_samples_per_second": 82.325,
"eval_steps_per_second": 10.291,
"step": 1200
},
{
"epoch": 4.39,
"eval_loss": 0.1790640652179718,
"eval_lrap": 0.9050188553107904,
"eval_runtime": 8.1622,
"eval_samples_per_second": 82.331,
"eval_steps_per_second": 10.291,
"step": 1300
},
{
"epoch": 4.73,
"eval_loss": 0.18696065247058868,
"eval_lrap": 0.9335728010062837,
"eval_runtime": 8.1263,
"eval_samples_per_second": 82.694,
"eval_steps_per_second": 10.337,
"step": 1400
},
{
"epoch": 5.07,
"learning_rate": 1.1797297297297297e-05,
"loss": 0.1146,
"step": 1500
},
{
"epoch": 5.07,
"eval_loss": 0.1917356699705124,
"eval_lrap": 0.9100622279217009,
"eval_runtime": 8.1497,
"eval_samples_per_second": 82.457,
"eval_steps_per_second": 10.307,
"step": 1500
},
{
"epoch": 5.41,
"eval_loss": 0.17960630357265472,
"eval_lrap": 0.9317027532386741,
"eval_runtime": 8.1448,
"eval_samples_per_second": 82.507,
"eval_steps_per_second": 10.313,
"step": 1600
},
{
"epoch": 5.74,
"eval_loss": 0.18476144969463348,
"eval_lrap": 0.9179141722192571,
"eval_runtime": 8.0555,
"eval_samples_per_second": 83.422,
"eval_steps_per_second": 10.428,
"step": 1700
},
{
"epoch": 6.08,
"eval_loss": 0.18720324337482452,
"eval_lrap": 0.9076720128841897,
"eval_runtime": 8.1385,
"eval_samples_per_second": 82.571,
"eval_steps_per_second": 10.321,
"step": 1800
},
{
"epoch": 6.42,
"eval_loss": 0.18699432909488678,
"eval_lrap": 0.924198462000043,
"eval_runtime": 8.1746,
"eval_samples_per_second": 82.206,
"eval_steps_per_second": 10.276,
"step": 1900
},
{
"epoch": 6.76,
"learning_rate": 6.729729729729729e-06,
"loss": 0.0813,
"step": 2000
},
{
"epoch": 6.76,
"eval_loss": 0.17621222138404846,
"eval_lrap": 0.915026297261488,
"eval_runtime": 8.0568,
"eval_samples_per_second": 83.408,
"eval_steps_per_second": 10.426,
"step": 2000
},
{
"epoch": 7.09,
"eval_loss": 0.18799513578414917,
"eval_lrap": 0.9157933801078312,
"eval_runtime": 8.2,
"eval_samples_per_second": 81.951,
"eval_steps_per_second": 10.244,
"step": 2100
},
{
"epoch": 7.43,
"eval_loss": 0.1881764978170395,
"eval_lrap": 0.9201832052904669,
"eval_runtime": 8.1918,
"eval_samples_per_second": 82.034,
"eval_steps_per_second": 10.254,
"step": 2200
},
{
"epoch": 7.77,
"eval_loss": 0.1911892592906952,
"eval_lrap": 0.9248369082537248,
"eval_runtime": 8.0575,
"eval_samples_per_second": 83.401,
"eval_steps_per_second": 10.425,
"step": 2300
},
{
"epoch": 8.11,
"eval_loss": 0.18784251809120178,
"eval_lrap": 0.9253503927004727,
"eval_runtime": 8.1128,
"eval_samples_per_second": 82.832,
"eval_steps_per_second": 10.354,
"step": 2400
},
{
"epoch": 8.45,
"learning_rate": 1.662162162162162e-06,
"loss": 0.0624,
"step": 2500
},
{
"epoch": 8.45,
"eval_loss": 0.19267971813678741,
"eval_lrap": 0.9211606593694309,
"eval_runtime": 8.201,
"eval_samples_per_second": 81.942,
"eval_steps_per_second": 10.243,
"step": 2500
},
{
"epoch": 8.78,
"eval_loss": 0.1919146031141281,
"eval_lrap": 0.9207750746085017,
"eval_runtime": 8.267,
"eval_samples_per_second": 81.287,
"eval_steps_per_second": 10.161,
"step": 2600
},
{
"epoch": 9.0,
"step": 2664,
"total_flos": 5605306439961600.0,
"train_loss": 0.1333099156170636,
"train_runtime": 980.3632,
"train_samples_per_second": 21.73,
"train_steps_per_second": 2.717
}
],
"max_steps": 2664,
"num_train_epochs": 9,
"total_flos": 5605306439961600.0,
"trial_name": null,
"trial_params": null
}