bert-large-uncased-hi / trainer_state.json
shax's picture v1 bbe128e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 9565,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_loss": 2.4315900802612305,
"eval_runtime": 4.6765,
"eval_samples_per_second": 171.069,
"eval_steps_per_second": 21.384,
"step": 100
},
{
"epoch": 0.1,
"eval_loss": 2.3032939434051514,
"eval_runtime": 4.6682,
"eval_samples_per_second": 171.373,
"eval_steps_per_second": 21.422,
"step": 200
},
{
"epoch": 0.16,
"eval_loss": 2.217759609222412,
"eval_runtime": 4.6679,
"eval_samples_per_second": 171.381,
"eval_steps_per_second": 21.423,
"step": 300
},
{
"epoch": 0.21,
"eval_loss": 2.14911150932312,
"eval_runtime": 4.6757,
"eval_samples_per_second": 171.099,
"eval_steps_per_second": 21.387,
"step": 400
},
{
"epoch": 0.26,
"eval_loss": 2.155996322631836,
"eval_runtime": 4.6665,
"eval_samples_per_second": 171.433,
"eval_steps_per_second": 21.429,
"step": 500
},
{
"epoch": 0.31,
"eval_loss": 2.01924991607666,
"eval_runtime": 4.6796,
"eval_samples_per_second": 170.956,
"eval_steps_per_second": 21.37,
"step": 600
},
{
"epoch": 0.37,
"eval_loss": 2.0464463233947754,
"eval_runtime": 4.6799,
"eval_samples_per_second": 170.944,
"eval_steps_per_second": 21.368,
"step": 700
},
{
"epoch": 0.42,
"eval_loss": 1.977142333984375,
"eval_runtime": 4.6707,
"eval_samples_per_second": 171.279,
"eval_steps_per_second": 21.41,
"step": 800
},
{
"epoch": 0.47,
"eval_loss": 1.967787504196167,
"eval_runtime": 4.6743,
"eval_samples_per_second": 171.148,
"eval_steps_per_second": 21.393,
"step": 900
},
{
"epoch": 0.52,
"learning_rate": 8.954521693674857e-06,
"loss": 2.2574,
"step": 1000
},
{
"epoch": 0.52,
"eval_loss": 1.9178065061569214,
"eval_runtime": 4.721,
"eval_samples_per_second": 169.457,
"eval_steps_per_second": 21.182,
"step": 1000
},
{
"epoch": 0.58,
"eval_loss": 1.8966461420059204,
"eval_runtime": 4.6821,
"eval_samples_per_second": 170.864,
"eval_steps_per_second": 21.358,
"step": 1100
},
{
"epoch": 0.63,
"eval_loss": 1.818562626838684,
"eval_runtime": 4.6827,
"eval_samples_per_second": 170.841,
"eval_steps_per_second": 21.355,
"step": 1200
},
{
"epoch": 0.68,
"eval_loss": 1.8336358070373535,
"eval_runtime": 4.6864,
"eval_samples_per_second": 170.708,
"eval_steps_per_second": 21.339,
"step": 1300
},
{
"epoch": 0.73,
"eval_loss": 1.8185982704162598,
"eval_runtime": 4.673,
"eval_samples_per_second": 171.196,
"eval_steps_per_second": 21.399,
"step": 1400
},
{
"epoch": 0.78,
"eval_loss": 1.8159518241882324,
"eval_runtime": 4.6851,
"eval_samples_per_second": 170.755,
"eval_steps_per_second": 21.344,
"step": 1500
},
{
"epoch": 0.84,
"eval_loss": 1.806492567062378,
"eval_runtime": 4.6846,
"eval_samples_per_second": 170.771,
"eval_steps_per_second": 21.346,
"step": 1600
},
{
"epoch": 0.89,
"eval_loss": 1.7608861923217773,
"eval_runtime": 4.6877,
"eval_samples_per_second": 170.659,
"eval_steps_per_second": 21.332,
"step": 1700
},
{
"epoch": 0.94,
"eval_loss": 1.7618434429168701,
"eval_runtime": 4.6769,
"eval_samples_per_second": 171.055,
"eval_steps_per_second": 21.382,
"step": 1800
},
{
"epoch": 0.99,
"eval_loss": 1.7498806715011597,
"eval_runtime": 4.6908,
"eval_samples_per_second": 170.546,
"eval_steps_per_second": 21.318,
"step": 1900
},
{
"epoch": 1.05,
"learning_rate": 7.909043387349713e-06,
"loss": 1.871,
"step": 2000
},
{
"epoch": 1.05,
"eval_loss": 1.816186547279358,
"eval_runtime": 4.7307,
"eval_samples_per_second": 169.107,
"eval_steps_per_second": 21.138,
"step": 2000
},
{
"epoch": 1.1,
"eval_loss": 1.7303450107574463,
"eval_runtime": 4.6832,
"eval_samples_per_second": 170.822,
"eval_steps_per_second": 21.353,
"step": 2100
},
{
"epoch": 1.15,
"eval_loss": 1.6595067977905273,
"eval_runtime": 4.6808,
"eval_samples_per_second": 170.912,
"eval_steps_per_second": 21.364,
"step": 2200
},
{
"epoch": 1.2,
"eval_loss": 1.7004770040512085,
"eval_runtime": 4.6859,
"eval_samples_per_second": 170.726,
"eval_steps_per_second": 21.341,
"step": 2300
},
{
"epoch": 1.25,
"eval_loss": 1.6547716856002808,
"eval_runtime": 4.6729,
"eval_samples_per_second": 171.199,
"eval_steps_per_second": 21.4,
"step": 2400
},
{
"epoch": 1.31,
"eval_loss": 1.6392831802368164,
"eval_runtime": 4.6758,
"eval_samples_per_second": 171.094,
"eval_steps_per_second": 21.387,
"step": 2500
},
{
"epoch": 1.36,
"eval_loss": 1.6230372190475464,
"eval_runtime": 4.6697,
"eval_samples_per_second": 171.318,
"eval_steps_per_second": 21.415,
"step": 2600
},
{
"epoch": 1.41,
"eval_loss": 1.63216233253479,
"eval_runtime": 4.6753,
"eval_samples_per_second": 171.111,
"eval_steps_per_second": 21.389,
"step": 2700
},
{
"epoch": 1.46,
"eval_loss": 1.627321720123291,
"eval_runtime": 4.6653,
"eval_samples_per_second": 171.477,
"eval_steps_per_second": 21.435,
"step": 2800
},
{
"epoch": 1.52,
"eval_loss": 1.6078392267227173,
"eval_runtime": 4.6673,
"eval_samples_per_second": 171.404,
"eval_steps_per_second": 21.425,
"step": 2900
},
{
"epoch": 1.57,
"learning_rate": 6.863565081024569e-06,
"loss": 1.7234,
"step": 3000
},
{
"epoch": 1.57,
"eval_loss": 1.6030551195144653,
"eval_runtime": 4.6964,
"eval_samples_per_second": 170.344,
"eval_steps_per_second": 21.293,
"step": 3000
},
{
"epoch": 1.62,
"eval_loss": 1.553316593170166,
"eval_runtime": 4.6663,
"eval_samples_per_second": 171.442,
"eval_steps_per_second": 21.43,
"step": 3100
},
{
"epoch": 1.67,
"eval_loss": 1.6135989427566528,
"eval_runtime": 4.6687,
"eval_samples_per_second": 171.355,
"eval_steps_per_second": 21.419,
"step": 3200
},
{
"epoch": 1.73,
"eval_loss": 1.5696121454238892,
"eval_runtime": 4.6764,
"eval_samples_per_second": 171.072,
"eval_steps_per_second": 21.384,
"step": 3300
},
{
"epoch": 1.78,
"eval_loss": 1.565152883529663,
"eval_runtime": 4.6699,
"eval_samples_per_second": 171.31,
"eval_steps_per_second": 21.414,
"step": 3400
},
{
"epoch": 1.83,
"eval_loss": 1.522884488105774,
"eval_runtime": 4.6578,
"eval_samples_per_second": 171.755,
"eval_steps_per_second": 21.469,
"step": 3500
},
{
"epoch": 1.88,
"eval_loss": 1.5707228183746338,
"eval_runtime": 4.6663,
"eval_samples_per_second": 171.44,
"eval_steps_per_second": 21.43,
"step": 3600
},
{
"epoch": 1.93,
"eval_loss": 1.587827444076538,
"eval_runtime": 4.6655,
"eval_samples_per_second": 171.473,
"eval_steps_per_second": 21.434,
"step": 3700
},
{
"epoch": 1.99,
"eval_loss": 1.5495318174362183,
"eval_runtime": 4.6751,
"eval_samples_per_second": 171.121,
"eval_steps_per_second": 21.39,
"step": 3800
},
{
"epoch": 2.04,
"eval_loss": 1.5380765199661255,
"eval_runtime": 4.6613,
"eval_samples_per_second": 171.627,
"eval_steps_per_second": 21.453,
"step": 3900
},
{
"epoch": 2.09,
"learning_rate": 5.8180867746994255e-06,
"loss": 1.611,
"step": 4000
},
{
"epoch": 2.09,
"eval_loss": 1.52095627784729,
"eval_runtime": 4.7023,
"eval_samples_per_second": 170.13,
"eval_steps_per_second": 21.266,
"step": 4000
},
{
"epoch": 2.14,
"eval_loss": 1.511513113975525,
"eval_runtime": 4.6667,
"eval_samples_per_second": 171.428,
"eval_steps_per_second": 21.428,
"step": 4100
},
{
"epoch": 2.2,
"eval_loss": 1.511332392692566,
"eval_runtime": 4.6592,
"eval_samples_per_second": 171.703,
"eval_steps_per_second": 21.463,
"step": 4200
},
{
"epoch": 2.25,
"eval_loss": 1.4714492559432983,
"eval_runtime": 4.6645,
"eval_samples_per_second": 171.508,
"eval_steps_per_second": 21.438,
"step": 4300
},
{
"epoch": 2.3,
"eval_loss": 1.5099194049835205,
"eval_runtime": 4.6633,
"eval_samples_per_second": 171.551,
"eval_steps_per_second": 21.444,
"step": 4400
},
{
"epoch": 2.35,
"eval_loss": 1.4862964153289795,
"eval_runtime": 4.6767,
"eval_samples_per_second": 171.061,
"eval_steps_per_second": 21.383,
"step": 4500
},
{
"epoch": 2.4,
"eval_loss": 1.5304350852966309,
"eval_runtime": 4.6709,
"eval_samples_per_second": 171.275,
"eval_steps_per_second": 21.409,
"step": 4600
},
{
"epoch": 2.46,
"eval_loss": 1.4990843534469604,
"eval_runtime": 4.6619,
"eval_samples_per_second": 171.605,
"eval_steps_per_second": 21.451,
"step": 4700
},
{
"epoch": 2.51,
"eval_loss": 1.5017355680465698,
"eval_runtime": 4.6598,
"eval_samples_per_second": 171.682,
"eval_steps_per_second": 21.46,
"step": 4800
},
{
"epoch": 2.56,
"eval_loss": 1.4134238958358765,
"eval_runtime": 4.6698,
"eval_samples_per_second": 171.313,
"eval_steps_per_second": 21.414,
"step": 4900
},
{
"epoch": 2.61,
"learning_rate": 4.7726084683742815e-06,
"loss": 1.5455,
"step": 5000
},
{
"epoch": 2.61,
"eval_loss": 1.4892077445983887,
"eval_runtime": 4.693,
"eval_samples_per_second": 170.468,
"eval_steps_per_second": 21.308,
"step": 5000
},
{
"epoch": 2.67,
"eval_loss": 1.4631962776184082,
"eval_runtime": 4.6718,
"eval_samples_per_second": 171.242,
"eval_steps_per_second": 21.405,
"step": 5100
},
{
"epoch": 2.72,
"eval_loss": 1.4686871767044067,
"eval_runtime": 4.6795,
"eval_samples_per_second": 170.959,
"eval_steps_per_second": 21.37,
"step": 5200
},
{
"epoch": 2.77,
"eval_loss": 1.444468379020691,
"eval_runtime": 4.6722,
"eval_samples_per_second": 171.227,
"eval_steps_per_second": 21.403,
"step": 5300
},
{
"epoch": 2.82,
"eval_loss": 1.434273362159729,
"eval_runtime": 4.6709,
"eval_samples_per_second": 171.273,
"eval_steps_per_second": 21.409,
"step": 5400
},
{
"epoch": 2.88,
"eval_loss": 1.4033972024917603,
"eval_runtime": 4.674,
"eval_samples_per_second": 171.159,
"eval_steps_per_second": 21.395,
"step": 5500
},
{
"epoch": 2.93,
"eval_loss": 1.3725674152374268,
"eval_runtime": 4.658,
"eval_samples_per_second": 171.746,
"eval_steps_per_second": 21.468,
"step": 5600
},
{
"epoch": 2.98,
"eval_loss": 1.4471670389175415,
"eval_runtime": 4.6706,
"eval_samples_per_second": 171.284,
"eval_steps_per_second": 21.411,
"step": 5700
},
{
"epoch": 3.03,
"eval_loss": 1.3971011638641357,
"eval_runtime": 4.6665,
"eval_samples_per_second": 171.433,
"eval_steps_per_second": 21.429,
"step": 5800
},
{
"epoch": 3.08,
"eval_loss": 1.4093689918518066,
"eval_runtime": 4.6837,
"eval_samples_per_second": 170.804,
"eval_steps_per_second": 21.351,
"step": 5900
},
{
"epoch": 3.14,
"learning_rate": 3.727130162049138e-06,
"loss": 1.4815,
"step": 6000
},
{
"epoch": 3.14,
"eval_loss": 1.434360384941101,
"eval_runtime": 4.7117,
"eval_samples_per_second": 169.79,
"eval_steps_per_second": 21.224,
"step": 6000
},
{
"epoch": 3.19,
"eval_loss": 1.412831425666809,
"eval_runtime": 4.678,
"eval_samples_per_second": 171.013,
"eval_steps_per_second": 21.377,
"step": 6100
},
{
"epoch": 3.24,
"eval_loss": 1.4168850183486938,
"eval_runtime": 4.6653,
"eval_samples_per_second": 171.478,
"eval_steps_per_second": 21.435,
"step": 6200
},
{
"epoch": 3.29,
"eval_loss": 1.399338960647583,
"eval_runtime": 4.6686,
"eval_samples_per_second": 171.359,
"eval_steps_per_second": 21.42,
"step": 6300
},
{
"epoch": 3.35,
"eval_loss": 1.4034981727600098,
"eval_runtime": 4.6638,
"eval_samples_per_second": 171.535,
"eval_steps_per_second": 21.442,
"step": 6400
},
{
"epoch": 3.4,
"eval_loss": 1.3951754570007324,
"eval_runtime": 4.6788,
"eval_samples_per_second": 170.982,
"eval_steps_per_second": 21.373,
"step": 6500
},
{
"epoch": 3.45,
"eval_loss": 1.3877923488616943,
"eval_runtime": 4.6855,
"eval_samples_per_second": 170.739,
"eval_steps_per_second": 21.342,
"step": 6600
},
{
"epoch": 3.5,
"eval_loss": 1.3673046827316284,
"eval_runtime": 4.6699,
"eval_samples_per_second": 171.31,
"eval_steps_per_second": 21.414,
"step": 6700
},
{
"epoch": 3.55,
"eval_loss": 1.3622443675994873,
"eval_runtime": 4.671,
"eval_samples_per_second": 171.271,
"eval_steps_per_second": 21.409,
"step": 6800
},
{
"epoch": 3.61,
"eval_loss": 1.374872088432312,
"eval_runtime": 4.6712,
"eval_samples_per_second": 171.263,
"eval_steps_per_second": 21.408,
"step": 6900
},
{
"epoch": 3.66,
"learning_rate": 2.681651855723994e-06,
"loss": 1.4492,
"step": 7000
},
{
"epoch": 3.66,
"eval_loss": 1.4481711387634277,
"eval_runtime": 4.6991,
"eval_samples_per_second": 170.247,
"eval_steps_per_second": 21.281,
"step": 7000
},
{
"epoch": 3.71,
"eval_loss": 1.3453810214996338,
"eval_runtime": 4.6891,
"eval_samples_per_second": 170.607,
"eval_steps_per_second": 21.326,
"step": 7100
},
{
"epoch": 3.76,
"eval_loss": 1.3628772497177124,
"eval_runtime": 4.691,
"eval_samples_per_second": 170.54,
"eval_steps_per_second": 21.317,
"step": 7200
},
{
"epoch": 3.82,
"eval_loss": 1.4115179777145386,
"eval_runtime": 4.6842,
"eval_samples_per_second": 170.787,
"eval_steps_per_second": 21.348,
"step": 7300
},
{
"epoch": 3.87,
"eval_loss": 1.3432800769805908,
"eval_runtime": 4.69,
"eval_samples_per_second": 170.576,
"eval_steps_per_second": 21.322,
"step": 7400
},
{
"epoch": 3.92,
"eval_loss": 1.368696928024292,
"eval_runtime": 4.6923,
"eval_samples_per_second": 170.493,
"eval_steps_per_second": 21.312,
"step": 7500
},
{
"epoch": 3.97,
"eval_loss": 1.4239104986190796,
"eval_runtime": 4.6873,
"eval_samples_per_second": 170.674,
"eval_steps_per_second": 21.334,
"step": 7600
},
{
"epoch": 4.03,
"eval_loss": 1.3071486949920654,
"eval_runtime": 4.6827,
"eval_samples_per_second": 170.843,
"eval_steps_per_second": 21.355,
"step": 7700
},
{
"epoch": 4.08,
"eval_loss": 1.353852391242981,
"eval_runtime": 4.6839,
"eval_samples_per_second": 170.798,
"eval_steps_per_second": 21.35,
"step": 7800
},
{
"epoch": 4.13,
"eval_loss": 1.3432263135910034,
"eval_runtime": 4.6946,
"eval_samples_per_second": 170.409,
"eval_steps_per_second": 21.301,
"step": 7900
},
{
"epoch": 4.18,
"learning_rate": 1.63617354939885e-06,
"loss": 1.4208,
"step": 8000
},
{
"epoch": 4.18,
"eval_loss": 1.3502365350723267,
"eval_runtime": 4.7172,
"eval_samples_per_second": 169.592,
"eval_steps_per_second": 21.199,
"step": 8000
},
{
"epoch": 4.23,
"eval_loss": 1.363999605178833,
"eval_runtime": 4.685,
"eval_samples_per_second": 170.758,
"eval_steps_per_second": 21.345,
"step": 8100
},
{
"epoch": 4.29,
"eval_loss": 1.382441520690918,
"eval_runtime": 4.6847,
"eval_samples_per_second": 170.769,
"eval_steps_per_second": 21.346,
"step": 8200
},
{
"epoch": 4.34,
"eval_loss": 1.3818987607955933,
"eval_runtime": 4.6986,
"eval_samples_per_second": 170.265,
"eval_steps_per_second": 21.283,
"step": 8300
},
{
"epoch": 4.39,
"eval_loss": 1.340713620185852,
"eval_runtime": 4.6878,
"eval_samples_per_second": 170.655,
"eval_steps_per_second": 21.332,
"step": 8400
},
{
"epoch": 4.44,
"eval_loss": 1.3344806432724,
"eval_runtime": 4.6958,
"eval_samples_per_second": 170.364,
"eval_steps_per_second": 21.295,
"step": 8500
},
{
"epoch": 4.5,
"eval_loss": 1.3563097715377808,
"eval_runtime": 4.6877,
"eval_samples_per_second": 170.66,
"eval_steps_per_second": 21.333,
"step": 8600
},
{
"epoch": 4.55,
"eval_loss": 1.3303130865097046,
"eval_runtime": 4.6903,
"eval_samples_per_second": 170.565,
"eval_steps_per_second": 21.321,
"step": 8700
},
{
"epoch": 4.6,
"eval_loss": 1.3454128503799438,
"eval_runtime": 4.6828,
"eval_samples_per_second": 170.838,
"eval_steps_per_second": 21.355,
"step": 8800
},
{
"epoch": 4.65,
"eval_loss": 1.3808575868606567,
"eval_runtime": 4.6966,
"eval_samples_per_second": 170.335,
"eval_steps_per_second": 21.292,
"step": 8900
},
{
"epoch": 4.7,
"learning_rate": 5.906952430737063e-07,
"loss": 1.4013,
"step": 9000
},
{
"epoch": 4.7,
"eval_loss": 1.3647897243499756,
"eval_runtime": 4.7177,
"eval_samples_per_second": 169.575,
"eval_steps_per_second": 21.197,
"step": 9000
},
{
"epoch": 4.76,
"eval_loss": 1.34979248046875,
"eval_runtime": 4.6998,
"eval_samples_per_second": 170.221,
"eval_steps_per_second": 21.278,
"step": 9100
},
{
"epoch": 4.81,
"eval_loss": 1.3509832620620728,
"eval_runtime": 4.685,
"eval_samples_per_second": 170.758,
"eval_steps_per_second": 21.345,
"step": 9200
},
{
"epoch": 4.86,
"eval_loss": 1.3402700424194336,
"eval_runtime": 4.6979,
"eval_samples_per_second": 170.289,
"eval_steps_per_second": 21.286,
"step": 9300
},
{
"epoch": 4.91,
"eval_loss": 1.2718613147735596,
"eval_runtime": 4.6876,
"eval_samples_per_second": 170.662,
"eval_steps_per_second": 21.333,
"step": 9400
},
{
"epoch": 4.97,
"eval_loss": 1.3347008228302002,
"eval_runtime": 4.6849,
"eval_samples_per_second": 170.762,
"eval_steps_per_second": 21.345,
"step": 9500
},
{
"epoch": 5.0,
"step": 9565,
"total_flos": 1.1622564417918384e+16,
"train_loss": 1.626130136269521,
"train_runtime": 5849.5407,
"train_samples_per_second": 13.078,
"train_steps_per_second": 1.635
}
],
"max_steps": 9565,
"num_train_epochs": 5,
"total_flos": 1.1622564417918384e+16,
"trial_name": null,
"trial_params": null
}