kozh_xlsr_run1 / checkpoint-199611 /trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
9966f94
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.0,
"eval_steps": 500,
"global_step": 199611,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.00011999999999999999,
"loss": 18.2932,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 0.00023999999999999998,
"loss": 4.6022,
"step": 1200
},
{
"epoch": 0.08,
"learning_rate": 0.00029959144763720545,
"loss": 2.9785,
"step": 1800
},
{
"epoch": 0.11,
"learning_rate": 0.0002987743429116165,
"loss": 2.065,
"step": 2400
},
{
"epoch": 0.14,
"learning_rate": 0.0002979572381860275,
"loss": 1.7581,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 0.0002971401334604385,
"loss": 1.6367,
"step": 3600
},
{
"epoch": 0.19,
"learning_rate": 0.0002963230287348495,
"loss": 1.5326,
"step": 4200
},
{
"epoch": 0.22,
"learning_rate": 0.0002955059240092605,
"loss": 1.4634,
"step": 4800
},
{
"epoch": 0.24,
"learning_rate": 0.0002946888192836715,
"loss": 1.4078,
"step": 5400
},
{
"epoch": 0.27,
"learning_rate": 0.0002938717145580825,
"loss": 1.385,
"step": 6000
},
{
"epoch": 0.3,
"learning_rate": 0.0002930546098324935,
"loss": 1.3197,
"step": 6600
},
{
"epoch": 0.32,
"learning_rate": 0.00029223750510690453,
"loss": 1.3254,
"step": 7200
},
{
"epoch": 0.35,
"learning_rate": 0.00029142040038131553,
"loss": 1.3057,
"step": 7800
},
{
"epoch": 0.38,
"learning_rate": 0.0002906032956557265,
"loss": 1.2723,
"step": 8400
},
{
"epoch": 0.41,
"learning_rate": 0.0002897861909301375,
"loss": 1.2535,
"step": 9000
},
{
"epoch": 0.43,
"learning_rate": 0.0002889690862045485,
"loss": 1.2308,
"step": 9600
},
{
"epoch": 0.46,
"learning_rate": 0.0002881519814789595,
"loss": 1.2168,
"step": 10200
},
{
"epoch": 0.49,
"learning_rate": 0.0002873348767533705,
"loss": 1.2023,
"step": 10800
},
{
"epoch": 0.51,
"learning_rate": 0.00028651777202778157,
"loss": 1.2041,
"step": 11400
},
{
"epoch": 0.54,
"learning_rate": 0.00028570066730219257,
"loss": 1.1883,
"step": 12000
},
{
"epoch": 0.57,
"learning_rate": 0.0002848835625766035,
"loss": 1.1623,
"step": 12600
},
{
"epoch": 0.6,
"learning_rate": 0.00028406645785101456,
"loss": 1.1844,
"step": 13200
},
{
"epoch": 0.62,
"learning_rate": 0.00028324935312542556,
"loss": 1.1347,
"step": 13800
},
{
"epoch": 0.65,
"learning_rate": 0.00028243224839983656,
"loss": 1.1252,
"step": 14400
},
{
"epoch": 0.68,
"learning_rate": 0.00028161514367424755,
"loss": 1.1061,
"step": 15000
},
{
"epoch": 0.7,
"learning_rate": 0.00028079803894865855,
"loss": 1.1354,
"step": 15600
},
{
"epoch": 0.73,
"learning_rate": 0.00027998093422306955,
"loss": 1.1211,
"step": 16200
},
{
"epoch": 0.76,
"learning_rate": 0.00027916382949748055,
"loss": 1.0821,
"step": 16800
},
{
"epoch": 0.78,
"learning_rate": 0.00027834672477189154,
"loss": 1.0761,
"step": 17400
},
{
"epoch": 0.81,
"learning_rate": 0.0002775296200463026,
"loss": 1.0546,
"step": 18000
},
{
"epoch": 0.84,
"learning_rate": 0.0002767125153207136,
"loss": 1.0796,
"step": 18600
},
{
"epoch": 0.87,
"learning_rate": 0.0002758954105951246,
"loss": 1.0725,
"step": 19200
},
{
"epoch": 0.89,
"learning_rate": 0.0002750783058695356,
"loss": 1.0716,
"step": 19800
},
{
"epoch": 0.92,
"learning_rate": 0.0002742612011439466,
"loss": 1.0463,
"step": 20400
},
{
"epoch": 0.95,
"learning_rate": 0.0002734440964183576,
"loss": 1.0412,
"step": 21000
},
{
"epoch": 0.97,
"learning_rate": 0.0002726269916927686,
"loss": 1.0318,
"step": 21600
},
{
"epoch": 1.0,
"learning_rate": 0.00027180988696717963,
"loss": 1.0419,
"step": 22200
},
{
"epoch": 1.03,
"learning_rate": 0.00027099278224159063,
"loss": 0.9673,
"step": 22800
},
{
"epoch": 1.06,
"learning_rate": 0.0002701756775160016,
"loss": 0.9693,
"step": 23400
},
{
"epoch": 1.08,
"learning_rate": 0.0002693585727904126,
"loss": 0.9555,
"step": 24000
},
{
"epoch": 1.11,
"learning_rate": 0.0002685414680648236,
"loss": 0.9832,
"step": 24600
},
{
"epoch": 1.14,
"learning_rate": 0.0002677243633392346,
"loss": 0.9578,
"step": 25200
},
{
"epoch": 1.16,
"learning_rate": 0.0002669072586136456,
"loss": 0.9569,
"step": 25800
},
{
"epoch": 1.19,
"learning_rate": 0.00026609015388805667,
"loss": 0.9521,
"step": 26400
},
{
"epoch": 1.22,
"learning_rate": 0.0002652730491624676,
"loss": 0.953,
"step": 27000
},
{
"epoch": 1.24,
"learning_rate": 0.0002644559444368786,
"loss": 0.9757,
"step": 27600
},
{
"epoch": 1.27,
"learning_rate": 0.00026363883971128966,
"loss": 0.9517,
"step": 28200
},
{
"epoch": 1.3,
"learning_rate": 0.00026282173498570066,
"loss": 0.9552,
"step": 28800
},
{
"epoch": 1.33,
"learning_rate": 0.00026200463026011166,
"loss": 0.944,
"step": 29400
},
{
"epoch": 1.35,
"learning_rate": 0.00026118752553452266,
"loss": 0.936,
"step": 30000
},
{
"epoch": 1.38,
"learning_rate": 0.00026037042080893365,
"loss": 0.9071,
"step": 30600
},
{
"epoch": 1.41,
"learning_rate": 0.00025955331608334465,
"loss": 0.9137,
"step": 31200
},
{
"epoch": 1.43,
"learning_rate": 0.00025873621135775565,
"loss": 0.9191,
"step": 31800
},
{
"epoch": 1.46,
"learning_rate": 0.00025791910663216665,
"loss": 0.9185,
"step": 32400
},
{
"epoch": 1.49,
"learning_rate": 0.0002571020019065777,
"loss": 0.9054,
"step": 33000
},
{
"epoch": 1.51,
"learning_rate": 0.0002562848971809887,
"loss": 0.9274,
"step": 33600
},
{
"epoch": 1.54,
"learning_rate": 0.0002554677924553997,
"loss": 0.8956,
"step": 34200
},
{
"epoch": 1.57,
"learning_rate": 0.0002546506877298107,
"loss": 0.893,
"step": 34800
},
{
"epoch": 1.6,
"learning_rate": 0.0002538335830042217,
"loss": 0.9151,
"step": 35400
},
{
"epoch": 1.62,
"learning_rate": 0.0002530164782786327,
"loss": 0.8903,
"step": 36000
},
{
"epoch": 1.65,
"learning_rate": 0.0002521993735530437,
"loss": 0.8929,
"step": 36600
},
{
"epoch": 1.68,
"learning_rate": 0.0002513822688274547,
"loss": 0.8886,
"step": 37200
},
{
"epoch": 1.7,
"learning_rate": 0.0002505651641018657,
"loss": 0.8827,
"step": 37800
},
{
"epoch": 1.73,
"learning_rate": 0.0002497480593762767,
"loss": 0.8877,
"step": 38400
},
{
"epoch": 1.76,
"learning_rate": 0.00024893095465068773,
"loss": 0.868,
"step": 39000
},
{
"epoch": 1.79,
"learning_rate": 0.0002481138499250987,
"loss": 0.8731,
"step": 39600
},
{
"epoch": 1.81,
"learning_rate": 0.0002472967451995097,
"loss": 0.8649,
"step": 40200
},
{
"epoch": 1.84,
"learning_rate": 0.0002464796404739207,
"loss": 0.862,
"step": 40800
},
{
"epoch": 1.87,
"learning_rate": 0.0002456625357483317,
"loss": 0.8933,
"step": 41400
},
{
"epoch": 1.89,
"learning_rate": 0.0002448454310227427,
"loss": 0.8711,
"step": 42000
},
{
"epoch": 1.92,
"learning_rate": 0.00024402832629715374,
"loss": 0.8805,
"step": 42600
},
{
"epoch": 1.95,
"learning_rate": 0.00024321122157156474,
"loss": 0.8518,
"step": 43200
},
{
"epoch": 1.97,
"learning_rate": 0.00024239411684597576,
"loss": 0.8591,
"step": 43800
},
{
"epoch": 2.0,
"learning_rate": 0.00024157701212038673,
"loss": 0.8552,
"step": 44400
},
{
"epoch": 2.03,
"learning_rate": 0.00024075990739479773,
"loss": 0.7824,
"step": 45000
},
{
"epoch": 2.06,
"learning_rate": 0.00023994280266920876,
"loss": 0.7737,
"step": 45600
},
{
"epoch": 2.08,
"learning_rate": 0.00023912569794361975,
"loss": 0.7888,
"step": 46200
},
{
"epoch": 2.11,
"learning_rate": 0.00023830859321803075,
"loss": 0.7917,
"step": 46800
},
{
"epoch": 2.14,
"learning_rate": 0.00023749148849244178,
"loss": 0.7782,
"step": 47400
},
{
"epoch": 2.16,
"learning_rate": 0.00023667438376685275,
"loss": 0.7769,
"step": 48000
},
{
"epoch": 2.19,
"learning_rate": 0.00023585727904126377,
"loss": 0.7699,
"step": 48600
},
{
"epoch": 2.22,
"learning_rate": 0.00023504017431567477,
"loss": 0.7767,
"step": 49200
},
{
"epoch": 2.25,
"learning_rate": 0.00023422306959008577,
"loss": 0.7957,
"step": 49800
},
{
"epoch": 2.27,
"learning_rate": 0.0002334059648644968,
"loss": 0.7831,
"step": 50400
},
{
"epoch": 2.3,
"learning_rate": 0.0002325888601389078,
"loss": 0.8037,
"step": 51000
},
{
"epoch": 2.33,
"learning_rate": 0.00023177175541331876,
"loss": 0.7941,
"step": 51600
},
{
"epoch": 2.35,
"learning_rate": 0.00023095465068772978,
"loss": 0.7829,
"step": 52200
},
{
"epoch": 2.38,
"learning_rate": 0.00023013754596214078,
"loss": 0.7806,
"step": 52800
},
{
"epoch": 2.41,
"learning_rate": 0.0002293204412365518,
"loss": 0.7946,
"step": 53400
},
{
"epoch": 2.43,
"learning_rate": 0.0002285033365109628,
"loss": 0.7717,
"step": 54000
},
{
"epoch": 2.46,
"learning_rate": 0.00022768623178537383,
"loss": 0.7628,
"step": 54600
},
{
"epoch": 2.49,
"learning_rate": 0.0002268691270597848,
"loss": 0.7918,
"step": 55200
},
{
"epoch": 2.52,
"learning_rate": 0.0002260520223341958,
"loss": 0.7773,
"step": 55800
},
{
"epoch": 2.54,
"learning_rate": 0.00022523491760860682,
"loss": 0.7632,
"step": 56400
},
{
"epoch": 2.57,
"learning_rate": 0.00022441781288301782,
"loss": 0.7636,
"step": 57000
},
{
"epoch": 2.6,
"learning_rate": 0.00022360070815742884,
"loss": 0.7784,
"step": 57600
},
{
"epoch": 2.62,
"learning_rate": 0.00022278360343183984,
"loss": 0.7695,
"step": 58200
},
{
"epoch": 2.65,
"learning_rate": 0.0002219664987062508,
"loss": 0.7613,
"step": 58800
},
{
"epoch": 2.68,
"learning_rate": 0.00022114939398066184,
"loss": 0.7485,
"step": 59400
},
{
"epoch": 2.71,
"learning_rate": 0.00022033228925507283,
"loss": 0.7708,
"step": 60000
},
{
"epoch": 2.73,
"learning_rate": 0.00021951518452948386,
"loss": 0.7696,
"step": 60600
},
{
"epoch": 2.76,
"learning_rate": 0.00021869807980389486,
"loss": 0.7666,
"step": 61200
},
{
"epoch": 2.79,
"learning_rate": 0.00021788097507830585,
"loss": 0.785,
"step": 61800
},
{
"epoch": 2.81,
"learning_rate": 0.00021706387035271685,
"loss": 0.7501,
"step": 62400
},
{
"epoch": 2.84,
"learning_rate": 0.00021624676562712785,
"loss": 0.7595,
"step": 63000
},
{
"epoch": 2.87,
"learning_rate": 0.00021542966090153885,
"loss": 0.7308,
"step": 63600
},
{
"epoch": 2.89,
"learning_rate": 0.00021461255617594987,
"loss": 0.7349,
"step": 64200
},
{
"epoch": 2.92,
"learning_rate": 0.00021379545145036087,
"loss": 0.7373,
"step": 64800
},
{
"epoch": 2.95,
"learning_rate": 0.0002129783467247719,
"loss": 0.7557,
"step": 65400
},
{
"epoch": 2.98,
"learning_rate": 0.00021216124199918286,
"loss": 0.7597,
"step": 66000
},
{
"epoch": 3.0,
"learning_rate": 0.00021134413727359386,
"loss": 0.7466,
"step": 66600
},
{
"epoch": 3.03,
"learning_rate": 0.00021052703254800489,
"loss": 0.6804,
"step": 67200
},
{
"epoch": 3.06,
"learning_rate": 0.00020970992782241588,
"loss": 0.6638,
"step": 67800
},
{
"epoch": 3.08,
"learning_rate": 0.0002088928230968269,
"loss": 0.6752,
"step": 68400
},
{
"epoch": 3.11,
"learning_rate": 0.0002080757183712379,
"loss": 0.6732,
"step": 69000
},
{
"epoch": 3.14,
"learning_rate": 0.00020725861364564888,
"loss": 0.6693,
"step": 69600
},
{
"epoch": 3.17,
"learning_rate": 0.0002064415089200599,
"loss": 0.6765,
"step": 70200
},
{
"epoch": 3.19,
"learning_rate": 0.0002056244041944709,
"loss": 0.6703,
"step": 70800
},
{
"epoch": 3.22,
"learning_rate": 0.00020480729946888192,
"loss": 0.6746,
"step": 71400
},
{
"epoch": 3.25,
"learning_rate": 0.00020399019474329292,
"loss": 0.6885,
"step": 72000
},
{
"epoch": 3.27,
"learning_rate": 0.00020317309001770394,
"loss": 0.7011,
"step": 72600
},
{
"epoch": 3.3,
"learning_rate": 0.00020235598529211492,
"loss": 0.6752,
"step": 73200
},
{
"epoch": 3.33,
"learning_rate": 0.0002015388805665259,
"loss": 0.6875,
"step": 73800
},
{
"epoch": 3.35,
"learning_rate": 0.00020072177584093694,
"loss": 0.6809,
"step": 74400
},
{
"epoch": 3.38,
"learning_rate": 0.00019990467111534793,
"loss": 0.6775,
"step": 75000
},
{
"epoch": 3.41,
"learning_rate": 0.00019908756638975893,
"loss": 0.692,
"step": 75600
},
{
"epoch": 3.44,
"learning_rate": 0.00019827046166416996,
"loss": 0.68,
"step": 76200
},
{
"epoch": 3.46,
"learning_rate": 0.00019745335693858093,
"loss": 0.675,
"step": 76800
},
{
"epoch": 3.49,
"learning_rate": 0.00019663625221299195,
"loss": 0.6812,
"step": 77400
},
{
"epoch": 3.52,
"learning_rate": 0.00019581914748740295,
"loss": 0.6699,
"step": 78000
},
{
"epoch": 3.54,
"learning_rate": 0.00019500204276181395,
"loss": 0.6684,
"step": 78600
},
{
"epoch": 3.57,
"learning_rate": 0.00019418493803622497,
"loss": 0.675,
"step": 79200
},
{
"epoch": 3.6,
"learning_rate": 0.00019336783331063597,
"loss": 0.6479,
"step": 79800
},
{
"epoch": 3.63,
"learning_rate": 0.00019255072858504697,
"loss": 0.6679,
"step": 80400
},
{
"epoch": 3.65,
"learning_rate": 0.00019173362385945796,
"loss": 0.6831,
"step": 81000
},
{
"epoch": 3.68,
"learning_rate": 0.00019091651913386896,
"loss": 0.6633,
"step": 81600
},
{
"epoch": 3.71,
"learning_rate": 0.00019009941440828,
"loss": 0.6809,
"step": 82200
},
{
"epoch": 3.73,
"learning_rate": 0.00018928230968269098,
"loss": 0.6579,
"step": 82800
},
{
"epoch": 3.76,
"learning_rate": 0.000188465204957102,
"loss": 0.6539,
"step": 83400
},
{
"epoch": 3.79,
"learning_rate": 0.00018764810023151298,
"loss": 0.6607,
"step": 84000
},
{
"epoch": 3.81,
"learning_rate": 0.00018683099550592398,
"loss": 0.6615,
"step": 84600
},
{
"epoch": 3.84,
"learning_rate": 0.000186013890780335,
"loss": 0.6614,
"step": 85200
},
{
"epoch": 3.87,
"learning_rate": 0.000185196786054746,
"loss": 0.6517,
"step": 85800
},
{
"epoch": 3.9,
"learning_rate": 0.00018437968132915702,
"loss": 0.6559,
"step": 86400
},
{
"epoch": 3.92,
"learning_rate": 0.00018356257660356802,
"loss": 0.6506,
"step": 87000
},
{
"epoch": 3.95,
"learning_rate": 0.000182745471877979,
"loss": 0.6541,
"step": 87600
},
{
"epoch": 3.98,
"learning_rate": 0.00018192836715239002,
"loss": 0.6593,
"step": 88200
},
{
"epoch": 4.0,
"learning_rate": 0.00018111126242680101,
"loss": 0.6335,
"step": 88800
},
{
"epoch": 4.03,
"learning_rate": 0.00018029415770121204,
"loss": 0.5884,
"step": 89400
},
{
"epoch": 4.06,
"learning_rate": 0.00017947705297562304,
"loss": 0.5834,
"step": 90000
},
{
"epoch": 4.08,
"learning_rate": 0.00017865994825003403,
"loss": 0.596,
"step": 90600
},
{
"epoch": 4.11,
"learning_rate": 0.00017784284352444503,
"loss": 0.5839,
"step": 91200
},
{
"epoch": 4.14,
"learning_rate": 0.00017702573879885603,
"loss": 0.5738,
"step": 91800
},
{
"epoch": 4.17,
"learning_rate": 0.00017620863407326705,
"loss": 0.5835,
"step": 92400
},
{
"epoch": 4.19,
"learning_rate": 0.00017539152934767805,
"loss": 0.5738,
"step": 93000
},
{
"epoch": 4.22,
"learning_rate": 0.00017457442462208905,
"loss": 0.5773,
"step": 93600
},
{
"epoch": 4.25,
"learning_rate": 0.00017375731989650007,
"loss": 0.5866,
"step": 94200
},
{
"epoch": 4.27,
"learning_rate": 0.00017294021517091104,
"loss": 0.5843,
"step": 94800
},
{
"epoch": 4.3,
"learning_rate": 0.00017212311044532204,
"loss": 0.603,
"step": 95400
},
{
"epoch": 4.33,
"learning_rate": 0.00017130600571973307,
"loss": 0.5819,
"step": 96000
},
{
"epoch": 4.36,
"learning_rate": 0.00017048890099414406,
"loss": 0.592,
"step": 96600
},
{
"epoch": 4.38,
"learning_rate": 0.0001696717962685551,
"loss": 0.58,
"step": 97200
},
{
"epoch": 4.41,
"learning_rate": 0.0001688546915429661,
"loss": 0.5882,
"step": 97800
},
{
"epoch": 4.44,
"learning_rate": 0.00016803758681737706,
"loss": 0.5987,
"step": 98400
},
{
"epoch": 4.46,
"learning_rate": 0.00016722048209178808,
"loss": 0.585,
"step": 99000
},
{
"epoch": 4.49,
"learning_rate": 0.00016640337736619908,
"loss": 0.5769,
"step": 99600
},
{
"epoch": 4.52,
"learning_rate": 0.0001655862726406101,
"loss": 0.5813,
"step": 100200
},
{
"epoch": 4.54,
"learning_rate": 0.0001647691679150211,
"loss": 0.6053,
"step": 100800
},
{
"epoch": 4.57,
"learning_rate": 0.00016395206318943207,
"loss": 0.5889,
"step": 101400
},
{
"epoch": 4.6,
"learning_rate": 0.0001631349584638431,
"loss": 0.5877,
"step": 102000
},
{
"epoch": 4.63,
"learning_rate": 0.0001623178537382541,
"loss": 0.581,
"step": 102600
},
{
"epoch": 4.65,
"learning_rate": 0.00016150074901266512,
"loss": 0.5699,
"step": 103200
},
{
"epoch": 4.68,
"learning_rate": 0.00016068364428707612,
"loss": 0.5781,
"step": 103800
},
{
"epoch": 4.71,
"learning_rate": 0.00015986653956148714,
"loss": 0.5812,
"step": 104400
},
{
"epoch": 4.73,
"learning_rate": 0.0001590494348358981,
"loss": 0.5686,
"step": 105000
},
{
"epoch": 4.76,
"learning_rate": 0.0001582323301103091,
"loss": 0.5724,
"step": 105600
},
{
"epoch": 4.79,
"learning_rate": 0.00015741522538472013,
"loss": 0.5722,
"step": 106200
},
{
"epoch": 4.82,
"learning_rate": 0.00015659812065913113,
"loss": 0.5834,
"step": 106800
},
{
"epoch": 4.84,
"learning_rate": 0.00015578101593354213,
"loss": 0.5825,
"step": 107400
},
{
"epoch": 4.87,
"learning_rate": 0.00015496391120795315,
"loss": 0.5783,
"step": 108000
},
{
"epoch": 4.9,
"learning_rate": 0.00015414680648236412,
"loss": 0.5819,
"step": 108600
},
{
"epoch": 4.92,
"learning_rate": 0.00015332970175677515,
"loss": 0.5823,
"step": 109200
},
{
"epoch": 4.95,
"learning_rate": 0.00015251259703118615,
"loss": 0.5755,
"step": 109800
},
{
"epoch": 4.98,
"learning_rate": 0.00015169549230559714,
"loss": 0.571,
"step": 110400
},
{
"epoch": 5.0,
"learning_rate": 0.00015087838758000817,
"loss": 0.5603,
"step": 111000
},
{
"epoch": 5.03,
"learning_rate": 0.00015006128285441917,
"loss": 0.5127,
"step": 111600
},
{
"epoch": 5.06,
"learning_rate": 0.00014924417812883016,
"loss": 0.52,
"step": 112200
},
{
"epoch": 5.09,
"learning_rate": 0.0001484270734032412,
"loss": 0.5327,
"step": 112800
},
{
"epoch": 5.11,
"learning_rate": 0.00014760996867765216,
"loss": 0.5081,
"step": 113400
},
{
"epoch": 5.14,
"learning_rate": 0.00014679286395206318,
"loss": 0.5189,
"step": 114000
},
{
"epoch": 5.17,
"learning_rate": 0.00014597575922647418,
"loss": 0.5178,
"step": 114600
},
{
"epoch": 5.19,
"learning_rate": 0.00014515865450088518,
"loss": 0.5254,
"step": 115200
},
{
"epoch": 5.22,
"learning_rate": 0.00014434154977529618,
"loss": 0.5171,
"step": 115800
},
{
"epoch": 5.25,
"learning_rate": 0.0001435244450497072,
"loss": 0.5167,
"step": 116400
},
{
"epoch": 5.28,
"learning_rate": 0.0001427073403241182,
"loss": 0.5286,
"step": 117000
},
{
"epoch": 5.3,
"learning_rate": 0.0001418902355985292,
"loss": 0.5298,
"step": 117600
},
{
"epoch": 5.33,
"learning_rate": 0.00014107313087294022,
"loss": 0.5295,
"step": 118200
},
{
"epoch": 5.36,
"learning_rate": 0.0001402560261473512,
"loss": 0.5324,
"step": 118800
},
{
"epoch": 5.38,
"learning_rate": 0.00013943892142176222,
"loss": 0.5155,
"step": 119400
},
{
"epoch": 5.41,
"learning_rate": 0.0001386218166961732,
"loss": 0.5138,
"step": 120000
},
{
"epoch": 5.44,
"learning_rate": 0.0001378047119705842,
"loss": 0.5215,
"step": 120600
},
{
"epoch": 5.46,
"learning_rate": 0.00013698760724499524,
"loss": 0.5236,
"step": 121200
},
{
"epoch": 5.49,
"learning_rate": 0.00013617050251940623,
"loss": 0.5249,
"step": 121800
},
{
"epoch": 5.52,
"learning_rate": 0.00013535339779381723,
"loss": 0.5086,
"step": 122400
},
{
"epoch": 5.55,
"learning_rate": 0.00013453629306822823,
"loss": 0.5271,
"step": 123000
},
{
"epoch": 5.57,
"learning_rate": 0.00013371918834263925,
"loss": 0.5214,
"step": 123600
},
{
"epoch": 5.6,
"learning_rate": 0.00013290208361705022,
"loss": 0.5257,
"step": 124200
},
{
"epoch": 5.63,
"learning_rate": 0.00013208497889146125,
"loss": 0.5182,
"step": 124800
},
{
"epoch": 5.65,
"learning_rate": 0.00013126787416587225,
"loss": 0.5128,
"step": 125400
},
{
"epoch": 5.68,
"learning_rate": 0.00013045076944028324,
"loss": 0.5164,
"step": 126000
},
{
"epoch": 5.71,
"learning_rate": 0.00012963366471469427,
"loss": 0.5142,
"step": 126600
},
{
"epoch": 5.74,
"learning_rate": 0.00012881655998910527,
"loss": 0.5336,
"step": 127200
},
{
"epoch": 5.76,
"learning_rate": 0.00012799945526351626,
"loss": 0.5216,
"step": 127800
},
{
"epoch": 5.79,
"learning_rate": 0.00012718235053792726,
"loss": 0.5185,
"step": 128400
},
{
"epoch": 5.82,
"learning_rate": 0.00012636524581233829,
"loss": 0.5134,
"step": 129000
},
{
"epoch": 5.84,
"learning_rate": 0.00012554814108674928,
"loss": 0.5206,
"step": 129600
},
{
"epoch": 5.87,
"learning_rate": 0.00012473103636116028,
"loss": 0.5056,
"step": 130200
},
{
"epoch": 5.9,
"learning_rate": 0.00012391393163557128,
"loss": 0.4996,
"step": 130800
},
{
"epoch": 5.92,
"learning_rate": 0.00012309682690998228,
"loss": 0.51,
"step": 131400
},
{
"epoch": 5.95,
"learning_rate": 0.0001222797221843933,
"loss": 0.499,
"step": 132000
},
{
"epoch": 5.98,
"learning_rate": 0.0001214626174588043,
"loss": 0.5181,
"step": 132600
},
{
"epoch": 6.01,
"learning_rate": 0.0001206455127332153,
"loss": 0.5164,
"step": 133200
},
{
"epoch": 6.03,
"learning_rate": 0.0001198284080076263,
"loss": 0.4706,
"step": 133800
},
{
"epoch": 6.06,
"learning_rate": 0.00011901130328203729,
"loss": 0.4552,
"step": 134400
},
{
"epoch": 6.09,
"learning_rate": 0.0001181941985564483,
"loss": 0.457,
"step": 135000
},
{
"epoch": 6.11,
"learning_rate": 0.00011737709383085931,
"loss": 0.4606,
"step": 135600
},
{
"epoch": 6.14,
"learning_rate": 0.00011655998910527031,
"loss": 0.4685,
"step": 136200
},
{
"epoch": 6.17,
"learning_rate": 0.00011574288437968132,
"loss": 0.4564,
"step": 136800
},
{
"epoch": 6.2,
"learning_rate": 0.00011492577965409233,
"loss": 0.4611,
"step": 137400
},
{
"epoch": 6.22,
"learning_rate": 0.00011410867492850332,
"loss": 0.4496,
"step": 138000
},
{
"epoch": 6.25,
"learning_rate": 0.00011329157020291433,
"loss": 0.4509,
"step": 138600
},
{
"epoch": 6.28,
"learning_rate": 0.00011247446547732534,
"loss": 0.4546,
"step": 139200
},
{
"epoch": 6.3,
"learning_rate": 0.00011165736075173634,
"loss": 0.4616,
"step": 139800
},
{
"epoch": 6.33,
"learning_rate": 0.00011084025602614733,
"loss": 0.465,
"step": 140400
},
{
"epoch": 6.36,
"learning_rate": 0.00011002315130055835,
"loss": 0.4639,
"step": 141000
},
{
"epoch": 6.38,
"learning_rate": 0.00010920604657496934,
"loss": 0.4605,
"step": 141600
},
{
"epoch": 6.41,
"learning_rate": 0.00010838894184938035,
"loss": 0.4592,
"step": 142200
},
{
"epoch": 6.44,
"learning_rate": 0.00010757183712379136,
"loss": 0.4612,
"step": 142800
},
{
"epoch": 6.47,
"learning_rate": 0.00010675473239820235,
"loss": 0.4538,
"step": 143400
},
{
"epoch": 6.49,
"learning_rate": 0.00010593762767261336,
"loss": 0.452,
"step": 144000
},
{
"epoch": 6.52,
"learning_rate": 0.00010512052294702437,
"loss": 0.4701,
"step": 144600
},
{
"epoch": 6.55,
"learning_rate": 0.00010430341822143537,
"loss": 0.4518,
"step": 145200
},
{
"epoch": 6.57,
"learning_rate": 0.00010348631349584638,
"loss": 0.4594,
"step": 145800
},
{
"epoch": 6.6,
"learning_rate": 0.00010266920877025738,
"loss": 0.4593,
"step": 146400
},
{
"epoch": 6.63,
"learning_rate": 0.00010185210404466838,
"loss": 0.4651,
"step": 147000
},
{
"epoch": 6.65,
"learning_rate": 0.00010103499931907939,
"loss": 0.4547,
"step": 147600
},
{
"epoch": 6.68,
"learning_rate": 0.0001002178945934904,
"loss": 0.4544,
"step": 148200
},
{
"epoch": 6.71,
"learning_rate": 9.940078986790138e-05,
"loss": 0.4605,
"step": 148800
},
{
"epoch": 6.74,
"learning_rate": 9.858368514231239e-05,
"loss": 0.4518,
"step": 149400
},
{
"epoch": 6.76,
"learning_rate": 9.77665804167234e-05,
"loss": 0.4625,
"step": 150000
},
{
"epoch": 6.79,
"learning_rate": 9.69494756911344e-05,
"loss": 0.4537,
"step": 150600
},
{
"epoch": 6.82,
"learning_rate": 9.613237096554541e-05,
"loss": 0.4515,
"step": 151200
},
{
"epoch": 6.84,
"learning_rate": 9.531526623995642e-05,
"loss": 0.4507,
"step": 151800
},
{
"epoch": 6.87,
"learning_rate": 9.449816151436741e-05,
"loss": 0.4617,
"step": 152400
},
{
"epoch": 6.9,
"learning_rate": 9.368105678877842e-05,
"loss": 0.4494,
"step": 153000
},
{
"epoch": 6.93,
"learning_rate": 9.286395206318943e-05,
"loss": 0.4502,
"step": 153600
},
{
"epoch": 6.95,
"learning_rate": 9.204684733760043e-05,
"loss": 0.4495,
"step": 154200
},
{
"epoch": 6.98,
"learning_rate": 9.122974261201142e-05,
"loss": 0.4569,
"step": 154800
},
{
"epoch": 7.01,
"learning_rate": 9.041263788642244e-05,
"loss": 0.4416,
"step": 155400
},
{
"epoch": 7.03,
"learning_rate": 8.959553316083343e-05,
"loss": 0.4081,
"step": 156000
},
{
"epoch": 7.06,
"learning_rate": 8.877842843524444e-05,
"loss": 0.4196,
"step": 156600
},
{
"epoch": 7.09,
"learning_rate": 8.796132370965546e-05,
"loss": 0.4135,
"step": 157200
},
{
"epoch": 7.11,
"learning_rate": 8.714421898406644e-05,
"loss": 0.4088,
"step": 157800
},
{
"epoch": 7.14,
"learning_rate": 8.632711425847745e-05,
"loss": 0.4005,
"step": 158400
},
{
"epoch": 7.17,
"learning_rate": 8.551000953288846e-05,
"loss": 0.3954,
"step": 159000
},
{
"epoch": 7.2,
"learning_rate": 8.469290480729946e-05,
"loss": 0.4093,
"step": 159600
},
{
"epoch": 7.22,
"learning_rate": 8.387580008171047e-05,
"loss": 0.3998,
"step": 160200
},
{
"epoch": 7.25,
"learning_rate": 8.305869535612147e-05,
"loss": 0.4068,
"step": 160800
},
{
"epoch": 7.28,
"learning_rate": 8.224159063053247e-05,
"loss": 0.3933,
"step": 161400
},
{
"epoch": 7.3,
"learning_rate": 8.142448590494348e-05,
"loss": 0.3957,
"step": 162000
},
{
"epoch": 7.33,
"learning_rate": 8.060738117935449e-05,
"loss": 0.3954,
"step": 162600
},
{
"epoch": 7.36,
"learning_rate": 7.979027645376547e-05,
"loss": 0.3949,
"step": 163200
},
{
"epoch": 7.39,
"learning_rate": 7.897317172817648e-05,
"loss": 0.4014,
"step": 163800
},
{
"epoch": 7.41,
"learning_rate": 7.81560670025875e-05,
"loss": 0.4007,
"step": 164400
},
{
"epoch": 7.44,
"learning_rate": 7.733896227699849e-05,
"loss": 0.4037,
"step": 165000
},
{
"epoch": 7.47,
"learning_rate": 7.65218575514095e-05,
"loss": 0.4055,
"step": 165600
},
{
"epoch": 7.49,
"learning_rate": 7.570475282582051e-05,
"loss": 0.3985,
"step": 166200
},
{
"epoch": 7.52,
"learning_rate": 7.488764810023151e-05,
"loss": 0.4022,
"step": 166800
},
{
"epoch": 7.55,
"learning_rate": 7.407054337464251e-05,
"loss": 0.3872,
"step": 167400
},
{
"epoch": 7.57,
"learning_rate": 7.325343864905351e-05,
"loss": 0.3895,
"step": 168000
},
{
"epoch": 7.6,
"learning_rate": 7.243633392346452e-05,
"loss": 0.3968,
"step": 168600
},
{
"epoch": 7.63,
"learning_rate": 7.161922919787552e-05,
"loss": 0.4051,
"step": 169200
},
{
"epoch": 7.66,
"learning_rate": 7.080212447228653e-05,
"loss": 0.3915,
"step": 169800
},
{
"epoch": 7.68,
"learning_rate": 6.998501974669754e-05,
"loss": 0.3934,
"step": 170400
},
{
"epoch": 7.71,
"learning_rate": 6.916791502110854e-05,
"loss": 0.3943,
"step": 171000
},
{
"epoch": 7.74,
"learning_rate": 6.835081029551953e-05,
"loss": 0.3932,
"step": 171600
},
{
"epoch": 7.76,
"learning_rate": 6.753370556993054e-05,
"loss": 0.4063,
"step": 172200
},
{
"epoch": 7.79,
"learning_rate": 6.671660084434154e-05,
"loss": 0.3975,
"step": 172800
},
{
"epoch": 7.82,
"learning_rate": 6.589949611875254e-05,
"loss": 0.3915,
"step": 173400
},
{
"epoch": 7.85,
"learning_rate": 6.508239139316355e-05,
"loss": 0.3892,
"step": 174000
},
{
"epoch": 7.87,
"learning_rate": 6.426528666757456e-05,
"loss": 0.3831,
"step": 174600
},
{
"epoch": 7.9,
"learning_rate": 6.344818194198556e-05,
"loss": 0.3896,
"step": 175200
},
{
"epoch": 7.93,
"learning_rate": 6.263107721639657e-05,
"loss": 0.3839,
"step": 175800
},
{
"epoch": 7.95,
"learning_rate": 6.181397249080757e-05,
"loss": 0.401,
"step": 176400
},
{
"epoch": 7.98,
"learning_rate": 6.0996867765218565e-05,
"loss": 0.3888,
"step": 177000
},
{
"epoch": 8.01,
"learning_rate": 6.0179763039629576e-05,
"loss": 0.371,
"step": 177600
},
{
"epoch": 8.03,
"learning_rate": 5.936265831404058e-05,
"loss": 0.3514,
"step": 178200
},
{
"epoch": 8.06,
"learning_rate": 5.854555358845158e-05,
"loss": 0.364,
"step": 178800
},
{
"epoch": 8.09,
"learning_rate": 5.772844886286259e-05,
"loss": 0.3486,
"step": 179400
},
{
"epoch": 8.12,
"learning_rate": 5.691134413727359e-05,
"loss": 0.3531,
"step": 180000
},
{
"epoch": 8.14,
"learning_rate": 5.609423941168459e-05,
"loss": 0.3584,
"step": 180600
},
{
"epoch": 8.17,
"learning_rate": 5.52771346860956e-05,
"loss": 0.345,
"step": 181200
},
{
"epoch": 8.2,
"learning_rate": 5.44600299605066e-05,
"loss": 0.3406,
"step": 181800
},
{
"epoch": 8.22,
"learning_rate": 5.3642925234917604e-05,
"loss": 0.3519,
"step": 182400
},
{
"epoch": 8.25,
"learning_rate": 5.28258205093286e-05,
"loss": 0.3607,
"step": 183000
},
{
"epoch": 8.28,
"learning_rate": 5.200871578373961e-05,
"loss": 0.3533,
"step": 183600
},
{
"epoch": 8.31,
"learning_rate": 5.119161105815061e-05,
"loss": 0.3586,
"step": 184200
},
{
"epoch": 8.33,
"learning_rate": 5.0374506332561615e-05,
"loss": 0.3453,
"step": 184800
},
{
"epoch": 8.36,
"learning_rate": 4.9557401606972626e-05,
"loss": 0.3431,
"step": 185400
},
{
"epoch": 8.39,
"learning_rate": 4.8740296881383624e-05,
"loss": 0.3546,
"step": 186000
},
{
"epoch": 8.41,
"learning_rate": 4.792319215579463e-05,
"loss": 0.3434,
"step": 186600
},
{
"epoch": 8.44,
"learning_rate": 4.710608743020563e-05,
"loss": 0.356,
"step": 187200
},
{
"epoch": 8.47,
"learning_rate": 4.628898270461664e-05,
"loss": 0.343,
"step": 187800
},
{
"epoch": 8.49,
"learning_rate": 4.547187797902764e-05,
"loss": 0.3501,
"step": 188400
},
{
"epoch": 8.52,
"learning_rate": 4.4654773253438645e-05,
"loss": 0.3384,
"step": 189000
},
{
"epoch": 8.55,
"learning_rate": 4.383766852784965e-05,
"loss": 0.3515,
"step": 189600
},
{
"epoch": 8.58,
"learning_rate": 4.302056380226065e-05,
"loss": 0.353,
"step": 190200
},
{
"epoch": 8.6,
"learning_rate": 4.220345907667166e-05,
"loss": 0.3448,
"step": 190800
},
{
"epoch": 8.63,
"learning_rate": 4.138635435108266e-05,
"loss": 0.3438,
"step": 191400
},
{
"epoch": 8.66,
"learning_rate": 4.056924962549366e-05,
"loss": 0.3539,
"step": 192000
},
{
"epoch": 8.68,
"learning_rate": 3.975214489990467e-05,
"loss": 0.3514,
"step": 192600
},
{
"epoch": 8.71,
"learning_rate": 3.893504017431567e-05,
"loss": 0.3514,
"step": 193200
},
{
"epoch": 8.74,
"learning_rate": 3.811793544872667e-05,
"loss": 0.3349,
"step": 193800
},
{
"epoch": 8.77,
"learning_rate": 3.730083072313768e-05,
"loss": 0.3429,
"step": 194400
},
{
"epoch": 8.79,
"learning_rate": 3.648372599754868e-05,
"loss": 0.3407,
"step": 195000
},
{
"epoch": 8.82,
"learning_rate": 3.5666621271959686e-05,
"loss": 0.3444,
"step": 195600
},
{
"epoch": 8.85,
"learning_rate": 3.484951654637069e-05,
"loss": 0.3434,
"step": 196200
},
{
"epoch": 8.87,
"learning_rate": 3.4032411820781695e-05,
"loss": 0.3423,
"step": 196800
},
{
"epoch": 8.9,
"learning_rate": 3.32153070951927e-05,
"loss": 0.3387,
"step": 197400
},
{
"epoch": 8.93,
"learning_rate": 3.23982023696037e-05,
"loss": 0.3415,
"step": 198000
},
{
"epoch": 8.95,
"learning_rate": 3.158109764401471e-05,
"loss": 0.3404,
"step": 198600
},
{
"epoch": 8.98,
"learning_rate": 3.076399291842571e-05,
"loss": 0.3303,
"step": 199200
}
],
"logging_steps": 600,
"max_steps": 221790,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.002554029520894e+20,
"trial_name": null,
"trial_params": null
}