chefberto-italian-cased / trainer_state.json
denocris's picture
First model version
bfa175b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.942528735632184,
"global_step": 46800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.031928480204342274,
"learning_rate": 1.995742869306088e-05,
"loss": 1.0424302673339845,
"step": 100
},
{
"epoch": 0.06385696040868455,
"learning_rate": 1.9914857386121755e-05,
"loss": 0.5240679550170898,
"step": 200
},
{
"epoch": 0.09578544061302682,
"learning_rate": 1.9872286079182633e-05,
"loss": 0.5014432144165039,
"step": 300
},
{
"epoch": 0.1277139208173691,
"learning_rate": 1.9829714772243508e-05,
"loss": 0.48912925720214845,
"step": 400
},
{
"epoch": 0.15964240102171137,
"learning_rate": 1.9787143465304387e-05,
"loss": 0.48597183227539065,
"step": 500
},
{
"epoch": 0.19157088122605365,
"learning_rate": 1.974457215836526e-05,
"loss": 0.4988706970214844,
"step": 600
},
{
"epoch": 0.22349936143039592,
"learning_rate": 1.970200085142614e-05,
"loss": 0.48521129608154295,
"step": 700
},
{
"epoch": 0.2554278416347382,
"learning_rate": 1.9659429544487015e-05,
"loss": 0.4737409210205078,
"step": 800
},
{
"epoch": 0.28735632183908044,
"learning_rate": 1.9616858237547893e-05,
"loss": 0.478267822265625,
"step": 900
},
{
"epoch": 0.31928480204342274,
"learning_rate": 1.957428693060877e-05,
"loss": 0.46842235565185547,
"step": 1000
},
{
"epoch": 0.351213282247765,
"learning_rate": 1.953171562366965e-05,
"loss": 0.47340740203857423,
"step": 1100
},
{
"epoch": 0.3831417624521073,
"learning_rate": 1.9489144316730525e-05,
"loss": 0.48022716522216796,
"step": 1200
},
{
"epoch": 0.41507024265644954,
"learning_rate": 1.9446573009791403e-05,
"loss": 0.4702701950073242,
"step": 1300
},
{
"epoch": 0.44699872286079184,
"learning_rate": 1.9404001702852278e-05,
"loss": 0.47483158111572266,
"step": 1400
},
{
"epoch": 0.4789272030651341,
"learning_rate": 1.9361430395913157e-05,
"loss": 0.45973930358886717,
"step": 1500
},
{
"epoch": 0.5108556832694764,
"learning_rate": 1.9318859088974035e-05,
"loss": 0.43192115783691404,
"step": 1600
},
{
"epoch": 0.5427841634738186,
"learning_rate": 1.927628778203491e-05,
"loss": 0.44062965393066406,
"step": 1700
},
{
"epoch": 0.5747126436781609,
"learning_rate": 1.923371647509579e-05,
"loss": 0.460767822265625,
"step": 1800
},
{
"epoch": 0.6066411238825032,
"learning_rate": 1.9191145168156663e-05,
"loss": 0.4533885192871094,
"step": 1900
},
{
"epoch": 0.6385696040868455,
"learning_rate": 1.914857386121754e-05,
"loss": 0.4435346221923828,
"step": 2000
},
{
"epoch": 0.6704980842911877,
"learning_rate": 1.9106002554278417e-05,
"loss": 0.4584750747680664,
"step": 2100
},
{
"epoch": 0.70242656449553,
"learning_rate": 1.9063431247339295e-05,
"loss": 0.43961280822753906,
"step": 2200
},
{
"epoch": 0.7343550446998723,
"learning_rate": 1.902085994040017e-05,
"loss": 0.43362281799316404,
"step": 2300
},
{
"epoch": 0.7662835249042146,
"learning_rate": 1.8978288633461048e-05,
"loss": 0.4443118286132812,
"step": 2400
},
{
"epoch": 0.7982120051085568,
"learning_rate": 1.8935717326521927e-05,
"loss": 0.4438581848144531,
"step": 2500
},
{
"epoch": 0.8301404853128991,
"learning_rate": 1.88931460195828e-05,
"loss": 0.4310215759277344,
"step": 2600
},
{
"epoch": 0.8620689655172413,
"learning_rate": 1.885057471264368e-05,
"loss": 0.4399332809448242,
"step": 2700
},
{
"epoch": 0.8939974457215837,
"learning_rate": 1.8808003405704555e-05,
"loss": 0.4511014175415039,
"step": 2800
},
{
"epoch": 0.9259259259259259,
"learning_rate": 1.8765432098765433e-05,
"loss": 0.44327774047851565,
"step": 2900
},
{
"epoch": 0.9578544061302682,
"learning_rate": 1.872286079182631e-05,
"loss": 0.43171138763427735,
"step": 3000
},
{
"epoch": 0.9897828863346104,
"learning_rate": 1.868028948488719e-05,
"loss": 0.4460626983642578,
"step": 3100
},
{
"epoch": 1.0,
"eval_loss": 0.3910863995552063,
"step": 3132
},
{
"epoch": 1.0217113665389528,
"learning_rate": 1.8637718177948065e-05,
"loss": 0.43170944213867185,
"step": 3200
},
{
"epoch": 1.053639846743295,
"learning_rate": 1.8595146871008943e-05,
"loss": 0.42756095886230466,
"step": 3300
},
{
"epoch": 1.0855683269476373,
"learning_rate": 1.8552575564069818e-05,
"loss": 0.423099479675293,
"step": 3400
},
{
"epoch": 1.1174968071519795,
"learning_rate": 1.8510004257130697e-05,
"loss": 0.42962390899658204,
"step": 3500
},
{
"epoch": 1.1494252873563218,
"learning_rate": 1.846743295019157e-05,
"loss": 0.4373976898193359,
"step": 3600
},
{
"epoch": 1.181353767560664,
"learning_rate": 1.842486164325245e-05,
"loss": 0.4381977462768555,
"step": 3700
},
{
"epoch": 1.2132822477650063,
"learning_rate": 1.8382290336313325e-05,
"loss": 0.4158343887329102,
"step": 3800
},
{
"epoch": 1.2452107279693487,
"learning_rate": 1.8339719029374203e-05,
"loss": 0.4281564712524414,
"step": 3900
},
{
"epoch": 1.277139208173691,
"learning_rate": 1.829714772243508e-05,
"loss": 0.41806602478027344,
"step": 4000
},
{
"epoch": 1.3090676883780332,
"learning_rate": 1.8254576415495957e-05,
"loss": 0.4173674011230469,
"step": 4100
},
{
"epoch": 1.3409961685823755,
"learning_rate": 1.8212005108556835e-05,
"loss": 0.43225109100341796,
"step": 4200
},
{
"epoch": 1.3729246487867177,
"learning_rate": 1.816943380161771e-05,
"loss": 0.421945686340332,
"step": 4300
},
{
"epoch": 1.40485312899106,
"learning_rate": 1.812686249467859e-05,
"loss": 0.42005214691162107,
"step": 4400
},
{
"epoch": 1.4367816091954024,
"learning_rate": 1.8084291187739463e-05,
"loss": 0.40552581787109376,
"step": 4500
},
{
"epoch": 1.4687100893997447,
"learning_rate": 1.804171988080034e-05,
"loss": 0.41495433807373044,
"step": 4600
},
{
"epoch": 1.500638569604087,
"learning_rate": 1.7999148573861217e-05,
"loss": 0.4268427658081055,
"step": 4700
},
{
"epoch": 1.5325670498084292,
"learning_rate": 1.7956577266922095e-05,
"loss": 0.4178670120239258,
"step": 4800
},
{
"epoch": 1.5644955300127714,
"learning_rate": 1.7914005959982973e-05,
"loss": 0.4088083267211914,
"step": 4900
},
{
"epoch": 1.5964240102171137,
"learning_rate": 1.787143465304385e-05,
"loss": 0.4090264129638672,
"step": 5000
},
{
"epoch": 1.628352490421456,
"learning_rate": 1.7828863346104727e-05,
"loss": 0.39727077484130857,
"step": 5100
},
{
"epoch": 1.6602809706257982,
"learning_rate": 1.7786292039165605e-05,
"loss": 0.40877700805664063,
"step": 5200
},
{
"epoch": 1.6922094508301404,
"learning_rate": 1.774372073222648e-05,
"loss": 0.4055898666381836,
"step": 5300
},
{
"epoch": 1.7241379310344827,
"learning_rate": 1.770114942528736e-05,
"loss": 0.40825370788574217,
"step": 5400
},
{
"epoch": 1.756066411238825,
"learning_rate": 1.7658578118348237e-05,
"loss": 0.39790542602539064,
"step": 5500
},
{
"epoch": 1.7879948914431671,
"learning_rate": 1.761600681140911e-05,
"loss": 0.4129365158081055,
"step": 5600
},
{
"epoch": 1.8199233716475096,
"learning_rate": 1.757343550446999e-05,
"loss": 0.4137036895751953,
"step": 5700
},
{
"epoch": 1.8518518518518519,
"learning_rate": 1.7530864197530865e-05,
"loss": 0.41169502258300783,
"step": 5800
},
{
"epoch": 1.883780332056194,
"learning_rate": 1.7488292890591743e-05,
"loss": 0.3932318115234375,
"step": 5900
},
{
"epoch": 1.9157088122605364,
"learning_rate": 1.7445721583652618e-05,
"loss": 0.40273929595947267,
"step": 6000
},
{
"epoch": 1.9476372924648788,
"learning_rate": 1.7403150276713497e-05,
"loss": 0.4069852066040039,
"step": 6100
},
{
"epoch": 1.979565772669221,
"learning_rate": 1.736057896977437e-05,
"loss": 0.40934764862060546,
"step": 6200
},
{
"epoch": 2.0,
"eval_loss": 0.3667986989021301,
"step": 6264
},
{
"epoch": 2.0114942528735633,
"learning_rate": 1.731800766283525e-05,
"loss": 0.40322193145751956,
"step": 6300
},
{
"epoch": 2.0434227330779056,
"learning_rate": 1.727543635589613e-05,
"loss": 0.3913343048095703,
"step": 6400
},
{
"epoch": 2.075351213282248,
"learning_rate": 1.7232865048957003e-05,
"loss": 0.4131240081787109,
"step": 6500
},
{
"epoch": 2.10727969348659,
"learning_rate": 1.719029374201788e-05,
"loss": 0.394369010925293,
"step": 6600
},
{
"epoch": 2.1392081736909323,
"learning_rate": 1.7147722435078757e-05,
"loss": 0.3991780471801758,
"step": 6700
},
{
"epoch": 2.1711366538952745,
"learning_rate": 1.7105151128139635e-05,
"loss": 0.3874116134643555,
"step": 6800
},
{
"epoch": 2.203065134099617,
"learning_rate": 1.706257982120051e-05,
"loss": 0.387044792175293,
"step": 6900
},
{
"epoch": 2.234993614303959,
"learning_rate": 1.702000851426139e-05,
"loss": 0.39865818023681643,
"step": 7000
},
{
"epoch": 2.2669220945083013,
"learning_rate": 1.6977437207322267e-05,
"loss": 0.3999287414550781,
"step": 7100
},
{
"epoch": 2.2988505747126435,
"learning_rate": 1.6934865900383145e-05,
"loss": 0.4086351013183594,
"step": 7200
},
{
"epoch": 2.330779054916986,
"learning_rate": 1.689229459344402e-05,
"loss": 0.3991414642333984,
"step": 7300
},
{
"epoch": 2.362707535121328,
"learning_rate": 1.68497232865049e-05,
"loss": 0.3910430145263672,
"step": 7400
},
{
"epoch": 2.3946360153256707,
"learning_rate": 1.6807151979565773e-05,
"loss": 0.39477340698242186,
"step": 7500
},
{
"epoch": 2.4265644955300125,
"learning_rate": 1.676458067262665e-05,
"loss": 0.38976318359375,
"step": 7600
},
{
"epoch": 2.458492975734355,
"learning_rate": 1.6722009365687527e-05,
"loss": 0.38246253967285154,
"step": 7700
},
{
"epoch": 2.4904214559386975,
"learning_rate": 1.6679438058748405e-05,
"loss": 0.38488063812255857,
"step": 7800
},
{
"epoch": 2.5223499361430397,
"learning_rate": 1.6636866751809283e-05,
"loss": 0.3952900695800781,
"step": 7900
},
{
"epoch": 2.554278416347382,
"learning_rate": 1.6594295444870158e-05,
"loss": 0.3940334701538086,
"step": 8000
},
{
"epoch": 2.586206896551724,
"learning_rate": 1.6551724137931037e-05,
"loss": 0.3789644622802734,
"step": 8100
},
{
"epoch": 2.6181353767560664,
"learning_rate": 1.650915283099191e-05,
"loss": 0.38175716400146487,
"step": 8200
},
{
"epoch": 2.6500638569604087,
"learning_rate": 1.646658152405279e-05,
"loss": 0.3957417678833008,
"step": 8300
},
{
"epoch": 2.681992337164751,
"learning_rate": 1.6424010217113665e-05,
"loss": 0.3931695556640625,
"step": 8400
},
{
"epoch": 2.713920817369093,
"learning_rate": 1.6381438910174543e-05,
"loss": 0.3700098419189453,
"step": 8500
},
{
"epoch": 2.7458492975734354,
"learning_rate": 1.6338867603235418e-05,
"loss": 0.3986296463012695,
"step": 8600
},
{
"epoch": 2.7777777777777777,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.377045783996582,
"step": 8700
},
{
"epoch": 2.80970625798212,
"learning_rate": 1.6253724989357175e-05,
"loss": 0.38455604553222655,
"step": 8800
},
{
"epoch": 2.841634738186462,
"learning_rate": 1.6211153682418053e-05,
"loss": 0.39248775482177733,
"step": 8900
},
{
"epoch": 2.873563218390805,
"learning_rate": 1.6168582375478928e-05,
"loss": 0.3952408599853516,
"step": 9000
},
{
"epoch": 2.9054916985951467,
"learning_rate": 1.6126011068539807e-05,
"loss": 0.38378463745117186,
"step": 9100
},
{
"epoch": 2.9374201787994894,
"learning_rate": 1.608343976160068e-05,
"loss": 0.3779494857788086,
"step": 9200
},
{
"epoch": 2.969348659003831,
"learning_rate": 1.604086845466156e-05,
"loss": 0.39154972076416017,
"step": 9300
},
{
"epoch": 3.0,
"eval_loss": 0.3532629609107971,
"step": 9396
},
{
"epoch": 3.001277139208174,
"learning_rate": 1.599829714772244e-05,
"loss": 0.38505748748779295,
"step": 9400
},
{
"epoch": 3.033205619412516,
"learning_rate": 1.5955725840783313e-05,
"loss": 0.37334869384765623,
"step": 9500
},
{
"epoch": 3.0651340996168583,
"learning_rate": 1.591315453384419e-05,
"loss": 0.36475982666015627,
"step": 9600
},
{
"epoch": 3.0970625798212006,
"learning_rate": 1.5870583226905067e-05,
"loss": 0.3732810592651367,
"step": 9700
},
{
"epoch": 3.128991060025543,
"learning_rate": 1.5828011919965945e-05,
"loss": 0.36443687438964845,
"step": 9800
},
{
"epoch": 3.160919540229885,
"learning_rate": 1.578544061302682e-05,
"loss": 0.38134773254394533,
"step": 9900
},
{
"epoch": 3.1928480204342273,
"learning_rate": 1.57428693060877e-05,
"loss": 0.3660939407348633,
"step": 10000
},
{
"epoch": 3.2247765006385696,
"learning_rate": 1.5700297999148573e-05,
"loss": 0.38626991271972655,
"step": 10100
},
{
"epoch": 3.256704980842912,
"learning_rate": 1.565772669220945e-05,
"loss": 0.36326351165771487,
"step": 10200
},
{
"epoch": 3.288633461047254,
"learning_rate": 1.561515538527033e-05,
"loss": 0.3856014633178711,
"step": 10300
},
{
"epoch": 3.3205619412515963,
"learning_rate": 1.5572584078331205e-05,
"loss": 0.38314430236816405,
"step": 10400
},
{
"epoch": 3.3524904214559386,
"learning_rate": 1.5530012771392083e-05,
"loss": 0.3787594223022461,
"step": 10500
},
{
"epoch": 3.384418901660281,
"learning_rate": 1.5487441464452958e-05,
"loss": 0.37935165405273436,
"step": 10600
},
{
"epoch": 3.416347381864623,
"learning_rate": 1.5444870157513837e-05,
"loss": 0.3672695541381836,
"step": 10700
},
{
"epoch": 3.4482758620689653,
"learning_rate": 1.540229885057471e-05,
"loss": 0.3799928283691406,
"step": 10800
},
{
"epoch": 3.480204342273308,
"learning_rate": 1.5359727543635593e-05,
"loss": 0.381710090637207,
"step": 10900
},
{
"epoch": 3.51213282247765,
"learning_rate": 1.531715623669647e-05,
"loss": 0.35085960388183596,
"step": 11000
},
{
"epoch": 3.5440613026819925,
"learning_rate": 1.5274584929757347e-05,
"loss": 0.37504680633544923,
"step": 11100
},
{
"epoch": 3.5759897828863347,
"learning_rate": 1.5232013622818223e-05,
"loss": 0.36862171173095704,
"step": 11200
},
{
"epoch": 3.607918263090677,
"learning_rate": 1.51894423158791e-05,
"loss": 0.3727375793457031,
"step": 11300
},
{
"epoch": 3.6398467432950192,
"learning_rate": 1.5146871008939977e-05,
"loss": 0.37135467529296873,
"step": 11400
},
{
"epoch": 3.6717752234993615,
"learning_rate": 1.5104299702000853e-05,
"loss": 0.3693832778930664,
"step": 11500
},
{
"epoch": 3.7037037037037037,
"learning_rate": 1.506172839506173e-05,
"loss": 0.36289344787597655,
"step": 11600
},
{
"epoch": 3.735632183908046,
"learning_rate": 1.5019157088122607e-05,
"loss": 0.3692543411254883,
"step": 11700
},
{
"epoch": 3.767560664112388,
"learning_rate": 1.4976585781183483e-05,
"loss": 0.3623777770996094,
"step": 11800
},
{
"epoch": 3.7994891443167305,
"learning_rate": 1.493401447424436e-05,
"loss": 0.3629803848266602,
"step": 11900
},
{
"epoch": 3.8314176245210727,
"learning_rate": 1.4891443167305237e-05,
"loss": 0.3619497680664063,
"step": 12000
},
{
"epoch": 3.863346104725415,
"learning_rate": 1.4848871860366115e-05,
"loss": 0.35669734954833987,
"step": 12100
},
{
"epoch": 3.895274584929757,
"learning_rate": 1.4806300553426992e-05,
"loss": 0.359210205078125,
"step": 12200
},
{
"epoch": 3.9272030651340994,
"learning_rate": 1.4763729246487868e-05,
"loss": 0.3655078887939453,
"step": 12300
},
{
"epoch": 3.959131545338442,
"learning_rate": 1.4721157939548745e-05,
"loss": 0.3666878890991211,
"step": 12400
},
{
"epoch": 3.991060025542784,
"learning_rate": 1.4678586632609622e-05,
"loss": 0.3504248809814453,
"step": 12500
},
{
"epoch": 4.0,
"eval_loss": 0.3464440107345581,
"step": 12528
},
{
"epoch": 4.022988505747127,
"learning_rate": 1.4636015325670498e-05,
"loss": 0.35299652099609374,
"step": 12600
},
{
"epoch": 4.054916985951468,
"learning_rate": 1.4593444018731375e-05,
"loss": 0.3569066619873047,
"step": 12700
},
{
"epoch": 4.086845466155811,
"learning_rate": 1.4550872711792252e-05,
"loss": 0.3493093490600586,
"step": 12800
},
{
"epoch": 4.118773946360153,
"learning_rate": 1.4508301404853132e-05,
"loss": 0.3634659957885742,
"step": 12900
},
{
"epoch": 4.150702426564496,
"learning_rate": 1.4465730097914008e-05,
"loss": 0.37093032836914064,
"step": 13000
},
{
"epoch": 4.182630906768837,
"learning_rate": 1.4423158790974885e-05,
"loss": 0.37300064086914064,
"step": 13100
},
{
"epoch": 4.21455938697318,
"learning_rate": 1.4380587484035762e-05,
"loss": 0.358408203125,
"step": 13200
},
{
"epoch": 4.246487867177523,
"learning_rate": 1.4338016177096638e-05,
"loss": 0.3569377517700195,
"step": 13300
},
{
"epoch": 4.278416347381865,
"learning_rate": 1.4295444870157515e-05,
"loss": 0.3640250015258789,
"step": 13400
},
{
"epoch": 4.310344827586207,
"learning_rate": 1.4252873563218392e-05,
"loss": 0.3725627517700195,
"step": 13500
},
{
"epoch": 4.342273307790549,
"learning_rate": 1.421030225627927e-05,
"loss": 0.3584425354003906,
"step": 13600
},
{
"epoch": 4.374201787994892,
"learning_rate": 1.4167730949340147e-05,
"loss": 0.37500988006591796,
"step": 13700
},
{
"epoch": 4.406130268199234,
"learning_rate": 1.4125159642401023e-05,
"loss": 0.35521396636962893,
"step": 13800
},
{
"epoch": 4.438058748403576,
"learning_rate": 1.40825883354619e-05,
"loss": 0.35183258056640626,
"step": 13900
},
{
"epoch": 4.469987228607918,
"learning_rate": 1.4040017028522777e-05,
"loss": 0.35960380554199217,
"step": 14000
},
{
"epoch": 4.501915708812261,
"learning_rate": 1.3997445721583653e-05,
"loss": 0.3571435546875,
"step": 14100
},
{
"epoch": 4.533844189016603,
"learning_rate": 1.395487441464453e-05,
"loss": 0.35867115020751955,
"step": 14200
},
{
"epoch": 4.565772669220945,
"learning_rate": 1.3912303107705407e-05,
"loss": 0.34017555236816405,
"step": 14300
},
{
"epoch": 4.597701149425287,
"learning_rate": 1.3869731800766283e-05,
"loss": 0.3735758209228516,
"step": 14400
},
{
"epoch": 4.62962962962963,
"learning_rate": 1.3827160493827162e-05,
"loss": 0.36705623626708983,
"step": 14500
},
{
"epoch": 4.661558109833972,
"learning_rate": 1.3784589186888038e-05,
"loss": 0.3525060272216797,
"step": 14600
},
{
"epoch": 4.693486590038314,
"learning_rate": 1.3742017879948915e-05,
"loss": 0.3783878326416016,
"step": 14700
},
{
"epoch": 4.725415070242656,
"learning_rate": 1.3699446573009792e-05,
"loss": 0.3659477615356445,
"step": 14800
},
{
"epoch": 4.757343550446999,
"learning_rate": 1.365687526607067e-05,
"loss": 0.3537181091308594,
"step": 14900
},
{
"epoch": 4.789272030651341,
"learning_rate": 1.3614303959131547e-05,
"loss": 0.36351833343505857,
"step": 15000
},
{
"epoch": 4.821200510855683,
"learning_rate": 1.3571732652192425e-05,
"loss": 0.3483928298950195,
"step": 15100
},
{
"epoch": 4.853128991060025,
"learning_rate": 1.3529161345253302e-05,
"loss": 0.35997581481933594,
"step": 15200
},
{
"epoch": 4.885057471264368,
"learning_rate": 1.3486590038314178e-05,
"loss": 0.36364505767822264,
"step": 15300
},
{
"epoch": 4.91698595146871,
"learning_rate": 1.3444018731375055e-05,
"loss": 0.3437914276123047,
"step": 15400
},
{
"epoch": 4.948914431673052,
"learning_rate": 1.3401447424435932e-05,
"loss": 0.3517444610595703,
"step": 15500
},
{
"epoch": 4.980842911877395,
"learning_rate": 1.3358876117496808e-05,
"loss": 0.3623521041870117,
"step": 15600
},
{
"epoch": 5.0,
"eval_loss": 0.33480075001716614,
"step": 15660
},
{
"epoch": 5.012771392081737,
"learning_rate": 1.3316304810557685e-05,
"loss": 0.3527105712890625,
"step": 15700
},
{
"epoch": 5.044699872286079,
"learning_rate": 1.3273733503618562e-05,
"loss": 0.3470969009399414,
"step": 15800
},
{
"epoch": 5.076628352490421,
"learning_rate": 1.3231162196679438e-05,
"loss": 0.34361572265625,
"step": 15900
},
{
"epoch": 5.108556832694764,
"learning_rate": 1.3188590889740317e-05,
"loss": 0.35975650787353514,
"step": 16000
},
{
"epoch": 5.140485312899106,
"learning_rate": 1.3146019582801193e-05,
"loss": 0.34139663696289063,
"step": 16100
},
{
"epoch": 5.172413793103448,
"learning_rate": 1.310344827586207e-05,
"loss": 0.34681896209716795,
"step": 16200
},
{
"epoch": 5.20434227330779,
"learning_rate": 1.3060876968922947e-05,
"loss": 0.34409351348876954,
"step": 16300
},
{
"epoch": 5.236270753512133,
"learning_rate": 1.3018305661983823e-05,
"loss": 0.3398979949951172,
"step": 16400
},
{
"epoch": 5.268199233716475,
"learning_rate": 1.29757343550447e-05,
"loss": 0.3437363815307617,
"step": 16500
},
{
"epoch": 5.300127713920817,
"learning_rate": 1.2933163048105577e-05,
"loss": 0.33456321716308596,
"step": 16600
},
{
"epoch": 5.33205619412516,
"learning_rate": 1.2890591741166453e-05,
"loss": 0.3437419128417969,
"step": 16700
},
{
"epoch": 5.363984674329502,
"learning_rate": 1.2848020434227333e-05,
"loss": 0.3656898880004883,
"step": 16800
},
{
"epoch": 5.395913154533845,
"learning_rate": 1.280544912728821e-05,
"loss": 0.33624221801757814,
"step": 16900
},
{
"epoch": 5.427841634738186,
"learning_rate": 1.2762877820349087e-05,
"loss": 0.35640827178955076,
"step": 17000
},
{
"epoch": 5.459770114942529,
"learning_rate": 1.2720306513409963e-05,
"loss": 0.34689849853515625,
"step": 17100
},
{
"epoch": 5.491698595146871,
"learning_rate": 1.267773520647084e-05,
"loss": 0.3492275238037109,
"step": 17200
},
{
"epoch": 5.5236270753512136,
"learning_rate": 1.2635163899531717e-05,
"loss": 0.3490084457397461,
"step": 17300
},
{
"epoch": 5.555555555555555,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.33231266021728517,
"step": 17400
},
{
"epoch": 5.587484035759898,
"learning_rate": 1.2550021285653472e-05,
"loss": 0.3501285552978516,
"step": 17500
},
{
"epoch": 5.61941251596424,
"learning_rate": 1.2507449978714348e-05,
"loss": 0.3469379425048828,
"step": 17600
},
{
"epoch": 5.6513409961685825,
"learning_rate": 1.2464878671775225e-05,
"loss": 0.3600951766967773,
"step": 17700
},
{
"epoch": 5.683269476372924,
"learning_rate": 1.2422307364836102e-05,
"loss": 0.3439628982543945,
"step": 17800
},
{
"epoch": 5.715197956577267,
"learning_rate": 1.2379736057896978e-05,
"loss": 0.34368560791015623,
"step": 17900
},
{
"epoch": 5.747126436781609,
"learning_rate": 1.2337164750957855e-05,
"loss": 0.3624436950683594,
"step": 18000
},
{
"epoch": 5.7790549169859515,
"learning_rate": 1.2294593444018732e-05,
"loss": 0.3493986129760742,
"step": 18100
},
{
"epoch": 5.810983397190293,
"learning_rate": 1.2252022137079608e-05,
"loss": 0.33745758056640623,
"step": 18200
},
{
"epoch": 5.842911877394636,
"learning_rate": 1.2209450830140485e-05,
"loss": 0.3514177703857422,
"step": 18300
},
{
"epoch": 5.874840357598979,
"learning_rate": 1.2166879523201363e-05,
"loss": 0.3409608459472656,
"step": 18400
},
{
"epoch": 5.9067688378033205,
"learning_rate": 1.212430821626224e-05,
"loss": 0.3416262054443359,
"step": 18500
},
{
"epoch": 5.938697318007663,
"learning_rate": 1.2081736909323117e-05,
"loss": 0.33906620025634765,
"step": 18600
},
{
"epoch": 5.970625798212005,
"learning_rate": 1.2039165602383993e-05,
"loss": 0.3565713119506836,
"step": 18700
},
{
"epoch": 6.0,
"eval_loss": 0.32608646154403687,
"step": 18792
},
{
"epoch": 6.002554278416348,
"learning_rate": 1.1996594295444872e-05,
"loss": 0.3423441314697266,
"step": 18800
},
{
"epoch": 6.0344827586206895,
"learning_rate": 1.1954022988505748e-05,
"loss": 0.3439017105102539,
"step": 18900
},
{
"epoch": 6.066411238825032,
"learning_rate": 1.1911451681566627e-05,
"loss": 0.33277828216552735,
"step": 19000
},
{
"epoch": 6.098339719029374,
"learning_rate": 1.1868880374627503e-05,
"loss": 0.3349918365478516,
"step": 19100
},
{
"epoch": 6.130268199233717,
"learning_rate": 1.182630906768838e-05,
"loss": 0.3411783981323242,
"step": 19200
},
{
"epoch": 6.1621966794380585,
"learning_rate": 1.1783737760749257e-05,
"loss": 0.34938507080078124,
"step": 19300
},
{
"epoch": 6.194125159642401,
"learning_rate": 1.1741166453810133e-05,
"loss": 0.3302972412109375,
"step": 19400
},
{
"epoch": 6.226053639846743,
"learning_rate": 1.169859514687101e-05,
"loss": 0.3324479293823242,
"step": 19500
},
{
"epoch": 6.257982120051086,
"learning_rate": 1.1656023839931887e-05,
"loss": 0.3325730133056641,
"step": 19600
},
{
"epoch": 6.2899106002554275,
"learning_rate": 1.1613452532992763e-05,
"loss": 0.32390750885009767,
"step": 19700
},
{
"epoch": 6.32183908045977,
"learning_rate": 1.157088122605364e-05,
"loss": 0.3410587692260742,
"step": 19800
},
{
"epoch": 6.353767560664112,
"learning_rate": 1.1528309919114518e-05,
"loss": 0.33700084686279297,
"step": 19900
},
{
"epoch": 6.385696040868455,
"learning_rate": 1.1485738612175395e-05,
"loss": 0.343173942565918,
"step": 20000
},
{
"epoch": 6.417624521072797,
"learning_rate": 1.1443167305236272e-05,
"loss": 0.34580535888671876,
"step": 20100
},
{
"epoch": 6.449553001277139,
"learning_rate": 1.1400595998297148e-05,
"loss": 0.3282489395141602,
"step": 20200
},
{
"epoch": 6.481481481481482,
"learning_rate": 1.1358024691358025e-05,
"loss": 0.32179367065429687,
"step": 20300
},
{
"epoch": 6.513409961685824,
"learning_rate": 1.1315453384418902e-05,
"loss": 0.34018295288085937,
"step": 20400
},
{
"epoch": 6.545338441890166,
"learning_rate": 1.1272882077479778e-05,
"loss": 0.3345378494262695,
"step": 20500
},
{
"epoch": 6.577266922094508,
"learning_rate": 1.1230310770540655e-05,
"loss": 0.34897972106933595,
"step": 20600
},
{
"epoch": 6.609195402298851,
"learning_rate": 1.1187739463601532e-05,
"loss": 0.3494709014892578,
"step": 20700
},
{
"epoch": 6.641123882503193,
"learning_rate": 1.1145168156662412e-05,
"loss": 0.3467377090454102,
"step": 20800
},
{
"epoch": 6.673052362707535,
"learning_rate": 1.1102596849723288e-05,
"loss": 0.3386357116699219,
"step": 20900
},
{
"epoch": 6.704980842911877,
"learning_rate": 1.1060025542784165e-05,
"loss": 0.3346070098876953,
"step": 21000
},
{
"epoch": 6.73690932311622,
"learning_rate": 1.1017454235845042e-05,
"loss": 0.3389591598510742,
"step": 21100
},
{
"epoch": 6.768837803320562,
"learning_rate": 1.0974882928905918e-05,
"loss": 0.33276222229003904,
"step": 21200
},
{
"epoch": 6.800766283524904,
"learning_rate": 1.0932311621966795e-05,
"loss": 0.3417454528808594,
"step": 21300
},
{
"epoch": 6.832694763729246,
"learning_rate": 1.0889740315027673e-05,
"loss": 0.3352804183959961,
"step": 21400
},
{
"epoch": 6.864623243933589,
"learning_rate": 1.084716900808855e-05,
"loss": 0.341392822265625,
"step": 21500
},
{
"epoch": 6.896551724137931,
"learning_rate": 1.0804597701149427e-05,
"loss": 0.3257337188720703,
"step": 21600
},
{
"epoch": 6.928480204342273,
"learning_rate": 1.0762026394210303e-05,
"loss": 0.340169792175293,
"step": 21700
},
{
"epoch": 6.960408684546616,
"learning_rate": 1.071945508727118e-05,
"loss": 0.3393547821044922,
"step": 21800
},
{
"epoch": 6.992337164750958,
"learning_rate": 1.0676883780332057e-05,
"loss": 0.3345566940307617,
"step": 21900
},
{
"epoch": 7.0,
"eval_loss": 0.3236748278141022,
"step": 21924
},
{
"epoch": 7.0242656449553005,
"learning_rate": 1.0634312473392933e-05,
"loss": 0.341474609375,
"step": 22000
},
{
"epoch": 7.056194125159642,
"learning_rate": 1.059174116645381e-05,
"loss": 0.33665103912353517,
"step": 22100
},
{
"epoch": 7.088122605363985,
"learning_rate": 1.0549169859514687e-05,
"loss": 0.33666282653808594,
"step": 22200
},
{
"epoch": 7.120051085568327,
"learning_rate": 1.0506598552575565e-05,
"loss": 0.32267608642578127,
"step": 22300
},
{
"epoch": 7.1519795657726695,
"learning_rate": 1.0464027245636442e-05,
"loss": 0.32230213165283206,
"step": 22400
},
{
"epoch": 7.183908045977011,
"learning_rate": 1.0421455938697318e-05,
"loss": 0.3335090637207031,
"step": 22500
},
{
"epoch": 7.215836526181354,
"learning_rate": 1.0378884631758195e-05,
"loss": 0.3402565002441406,
"step": 22600
},
{
"epoch": 7.247765006385696,
"learning_rate": 1.0336313324819072e-05,
"loss": 0.33222633361816406,
"step": 22700
},
{
"epoch": 7.2796934865900385,
"learning_rate": 1.029374201787995e-05,
"loss": 0.3273036956787109,
"step": 22800
},
{
"epoch": 7.31162196679438,
"learning_rate": 1.0251170710940828e-05,
"loss": 0.34040824890136717,
"step": 22900
},
{
"epoch": 7.343550446998723,
"learning_rate": 1.0208599404001705e-05,
"loss": 0.33351837158203124,
"step": 23000
},
{
"epoch": 7.375478927203065,
"learning_rate": 1.0166028097062582e-05,
"loss": 0.3304040145874023,
"step": 23100
},
{
"epoch": 7.407407407407407,
"learning_rate": 1.0123456790123458e-05,
"loss": 0.3389059829711914,
"step": 23200
},
{
"epoch": 7.439335887611749,
"learning_rate": 1.0080885483184335e-05,
"loss": 0.35038864135742187,
"step": 23300
},
{
"epoch": 7.471264367816092,
"learning_rate": 1.0038314176245212e-05,
"loss": 0.32688159942626954,
"step": 23400
},
{
"epoch": 7.503192848020435,
"learning_rate": 9.995742869306088e-06,
"loss": 0.3443561935424805,
"step": 23500
},
{
"epoch": 7.535121328224776,
"learning_rate": 9.953171562366965e-06,
"loss": 0.33195884704589845,
"step": 23600
},
{
"epoch": 7.567049808429119,
"learning_rate": 9.910600255427842e-06,
"loss": 0.3439883041381836,
"step": 23700
},
{
"epoch": 7.598978288633461,
"learning_rate": 9.86802894848872e-06,
"loss": 0.32301868438720704,
"step": 23800
},
{
"epoch": 7.630906768837804,
"learning_rate": 9.825457641549597e-06,
"loss": 0.3437799072265625,
"step": 23900
},
{
"epoch": 7.662835249042145,
"learning_rate": 9.782886334610473e-06,
"loss": 0.3296714401245117,
"step": 24000
},
{
"epoch": 7.694763729246488,
"learning_rate": 9.74031502767135e-06,
"loss": 0.3370498275756836,
"step": 24100
},
{
"epoch": 7.72669220945083,
"learning_rate": 9.697743720732228e-06,
"loss": 0.3219729232788086,
"step": 24200
},
{
"epoch": 7.758620689655173,
"learning_rate": 9.655172413793105e-06,
"loss": 0.3258438491821289,
"step": 24300
},
{
"epoch": 7.790549169859514,
"learning_rate": 9.612601106853982e-06,
"loss": 0.33826839447021484,
"step": 24400
},
{
"epoch": 7.822477650063857,
"learning_rate": 9.570029799914858e-06,
"loss": 0.3263700866699219,
"step": 24500
},
{
"epoch": 7.854406130268199,
"learning_rate": 9.527458492975735e-06,
"loss": 0.3350722122192383,
"step": 24600
},
{
"epoch": 7.886334610472542,
"learning_rate": 9.484887186036612e-06,
"loss": 0.334184455871582,
"step": 24700
},
{
"epoch": 7.918263090676884,
"learning_rate": 9.442315879097488e-06,
"loss": 0.3267938232421875,
"step": 24800
},
{
"epoch": 7.950191570881226,
"learning_rate": 9.399744572158365e-06,
"loss": 0.3196305465698242,
"step": 24900
},
{
"epoch": 7.982120051085568,
"learning_rate": 9.357173265219243e-06,
"loss": 0.3230043029785156,
"step": 25000
},
{
"epoch": 8.0,
"eval_loss": 0.32138851284980774,
"step": 25056
},
{
"epoch": 8.01404853128991,
"learning_rate": 9.31460195828012e-06,
"loss": 0.3258121109008789,
"step": 25100
},
{
"epoch": 8.045977011494253,
"learning_rate": 9.272030651340997e-06,
"loss": 0.32868377685546873,
"step": 25200
},
{
"epoch": 8.077905491698596,
"learning_rate": 9.229459344401875e-06,
"loss": 0.31930324554443357,
"step": 25300
},
{
"epoch": 8.109833971902937,
"learning_rate": 9.186888037462752e-06,
"loss": 0.3216040420532227,
"step": 25400
},
{
"epoch": 8.14176245210728,
"learning_rate": 9.144316730523628e-06,
"loss": 0.3392459869384766,
"step": 25500
},
{
"epoch": 8.173690932311622,
"learning_rate": 9.101745423584505e-06,
"loss": 0.3244002914428711,
"step": 25600
},
{
"epoch": 8.205619412515965,
"learning_rate": 9.059174116645382e-06,
"loss": 0.329796142578125,
"step": 25700
},
{
"epoch": 8.237547892720306,
"learning_rate": 9.016602809706258e-06,
"loss": 0.310886344909668,
"step": 25800
},
{
"epoch": 8.269476372924649,
"learning_rate": 8.974031502767135e-06,
"loss": 0.3182815361022949,
"step": 25900
},
{
"epoch": 8.301404853128991,
"learning_rate": 8.931460195828012e-06,
"loss": 0.3267630386352539,
"step": 26000
},
{
"epoch": 8.333333333333334,
"learning_rate": 8.888888888888888e-06,
"loss": 0.31576622009277344,
"step": 26100
},
{
"epoch": 8.365261813537675,
"learning_rate": 8.846317581949767e-06,
"loss": 0.33119094848632813,
"step": 26200
},
{
"epoch": 8.397190293742018,
"learning_rate": 8.803746275010643e-06,
"loss": 0.3301513671875,
"step": 26300
},
{
"epoch": 8.42911877394636,
"learning_rate": 8.76117496807152e-06,
"loss": 0.3321194839477539,
"step": 26400
},
{
"epoch": 8.461047254150703,
"learning_rate": 8.718603661132398e-06,
"loss": 0.332429313659668,
"step": 26500
},
{
"epoch": 8.492975734355046,
"learning_rate": 8.676032354193275e-06,
"loss": 0.3258559036254883,
"step": 26600
},
{
"epoch": 8.524904214559387,
"learning_rate": 8.633461047254152e-06,
"loss": 0.32335933685302737,
"step": 26700
},
{
"epoch": 8.55683269476373,
"learning_rate": 8.590889740315028e-06,
"loss": 0.3441028594970703,
"step": 26800
},
{
"epoch": 8.588761174968072,
"learning_rate": 8.548318433375905e-06,
"loss": 0.3302944564819336,
"step": 26900
},
{
"epoch": 8.620689655172415,
"learning_rate": 8.505747126436782e-06,
"loss": 0.3203293228149414,
"step": 27000
},
{
"epoch": 8.652618135376756,
"learning_rate": 8.463175819497658e-06,
"loss": 0.3180072593688965,
"step": 27100
},
{
"epoch": 8.684546615581098,
"learning_rate": 8.420604512558537e-06,
"loss": 0.3143235015869141,
"step": 27200
},
{
"epoch": 8.71647509578544,
"learning_rate": 8.378033205619413e-06,
"loss": 0.31695037841796875,
"step": 27300
},
{
"epoch": 8.748403575989784,
"learning_rate": 8.33546189868029e-06,
"loss": 0.31933542251586916,
"step": 27400
},
{
"epoch": 8.780332056194124,
"learning_rate": 8.292890591741167e-06,
"loss": 0.3094404983520508,
"step": 27500
},
{
"epoch": 8.812260536398467,
"learning_rate": 8.250319284802043e-06,
"loss": 0.3251906204223633,
"step": 27600
},
{
"epoch": 8.84418901660281,
"learning_rate": 8.207747977862922e-06,
"loss": 0.31949323654174805,
"step": 27700
},
{
"epoch": 8.876117496807153,
"learning_rate": 8.165176670923798e-06,
"loss": 0.32407459259033206,
"step": 27800
},
{
"epoch": 8.908045977011493,
"learning_rate": 8.122605363984675e-06,
"loss": 0.34080780029296875,
"step": 27900
},
{
"epoch": 8.939974457215836,
"learning_rate": 8.080034057045552e-06,
"loss": 0.3133597183227539,
"step": 28000
},
{
"epoch": 8.971902937420179,
"learning_rate": 8.037462750106428e-06,
"loss": 0.31053606033325193,
"step": 28100
},
{
"epoch": 9.0,
"eval_loss": 0.3119257688522339,
"step": 28188
},
{
"epoch": 9.003831417624522,
"learning_rate": 7.994891443167307e-06,
"loss": 0.3158924293518066,
"step": 28200
},
{
"epoch": 9.035759897828862,
"learning_rate": 7.952320136228183e-06,
"loss": 0.3117160987854004,
"step": 28300
},
{
"epoch": 9.067688378033205,
"learning_rate": 7.90974882928906e-06,
"loss": 0.32216869354248046,
"step": 28400
},
{
"epoch": 9.099616858237548,
"learning_rate": 7.867177522349937e-06,
"loss": 0.314890079498291,
"step": 28500
},
{
"epoch": 9.13154533844189,
"learning_rate": 7.824606215410813e-06,
"loss": 0.3113277626037598,
"step": 28600
},
{
"epoch": 9.163473818646233,
"learning_rate": 7.78203490847169e-06,
"loss": 0.324998779296875,
"step": 28700
},
{
"epoch": 9.195402298850574,
"learning_rate": 7.739463601532567e-06,
"loss": 0.32509784698486327,
"step": 28800
},
{
"epoch": 9.227330779054917,
"learning_rate": 7.696892294593445e-06,
"loss": 0.3238474273681641,
"step": 28900
},
{
"epoch": 9.25925925925926,
"learning_rate": 7.654320987654322e-06,
"loss": 0.33168052673339843,
"step": 29000
},
{
"epoch": 9.291187739463602,
"learning_rate": 7.611749680715198e-06,
"loss": 0.3050485992431641,
"step": 29100
},
{
"epoch": 9.323116219667943,
"learning_rate": 7.569178373776076e-06,
"loss": 0.31826154708862303,
"step": 29200
},
{
"epoch": 9.355044699872286,
"learning_rate": 7.5266070668369525e-06,
"loss": 0.31972583770751956,
"step": 29300
},
{
"epoch": 9.386973180076629,
"learning_rate": 7.48403575989783e-06,
"loss": 0.3287076568603516,
"step": 29400
},
{
"epoch": 9.418901660280971,
"learning_rate": 7.441464452958707e-06,
"loss": 0.30761892318725587,
"step": 29500
},
{
"epoch": 9.450830140485312,
"learning_rate": 7.398893146019583e-06,
"loss": 0.3097171401977539,
"step": 29600
},
{
"epoch": 9.482758620689655,
"learning_rate": 7.35632183908046e-06,
"loss": 0.30806644439697267,
"step": 29700
},
{
"epoch": 9.514687100893997,
"learning_rate": 7.3137505321413375e-06,
"loss": 0.3174296760559082,
"step": 29800
},
{
"epoch": 9.54661558109834,
"learning_rate": 7.271179225202214e-06,
"loss": 0.31353973388671874,
"step": 29900
},
{
"epoch": 9.578544061302683,
"learning_rate": 7.228607918263091e-06,
"loss": 0.30578601837158204,
"step": 30000
},
{
"epoch": 9.610472541507024,
"learning_rate": 7.1860366113239675e-06,
"loss": 0.3045210838317871,
"step": 30100
},
{
"epoch": 9.642401021711366,
"learning_rate": 7.143465304384846e-06,
"loss": 0.3149559211730957,
"step": 30200
},
{
"epoch": 9.67432950191571,
"learning_rate": 7.1008939974457225e-06,
"loss": 0.3207520294189453,
"step": 30300
},
{
"epoch": 9.706257982120052,
"learning_rate": 7.058322690506599e-06,
"loss": 0.32308254241943357,
"step": 30400
},
{
"epoch": 9.738186462324393,
"learning_rate": 7.015751383567476e-06,
"loss": 0.3183433723449707,
"step": 30500
},
{
"epoch": 9.770114942528735,
"learning_rate": 6.973180076628353e-06,
"loss": 0.3200767135620117,
"step": 30600
},
{
"epoch": 9.802043422733078,
"learning_rate": 6.93060876968923e-06,
"loss": 0.3113987922668457,
"step": 30700
},
{
"epoch": 9.83397190293742,
"learning_rate": 6.888037462750107e-06,
"loss": 0.33167327880859376,
"step": 30800
},
{
"epoch": 9.865900383141762,
"learning_rate": 6.845466155810983e-06,
"loss": 0.3172581100463867,
"step": 30900
},
{
"epoch": 9.897828863346104,
"learning_rate": 6.802894848871861e-06,
"loss": 0.3124402046203613,
"step": 31000
},
{
"epoch": 9.929757343550447,
"learning_rate": 6.760323541932738e-06,
"loss": 0.30687171936035157,
"step": 31100
},
{
"epoch": 9.96168582375479,
"learning_rate": 6.717752234993615e-06,
"loss": 0.304738712310791,
"step": 31200
},
{
"epoch": 9.99361430395913,
"learning_rate": 6.6751809280544925e-06,
"loss": 0.3069480514526367,
"step": 31300
},
{
"epoch": 10.0,
"eval_loss": 0.31222018599510193,
"step": 31320
},
{
"epoch": 10.025542784163473,
"learning_rate": 6.632609621115369e-06,
"loss": 0.30982017517089844,
"step": 31400
},
{
"epoch": 10.057471264367816,
"learning_rate": 6.590038314176246e-06,
"loss": 0.29576118469238283,
"step": 31500
},
{
"epoch": 10.089399744572159,
"learning_rate": 6.5474670072371225e-06,
"loss": 0.318297004699707,
"step": 31600
},
{
"epoch": 10.121328224776502,
"learning_rate": 6.504895700297999e-06,
"loss": 0.2947650337219238,
"step": 31700
},
{
"epoch": 10.153256704980842,
"learning_rate": 6.462324393358877e-06,
"loss": 0.31912431716918943,
"step": 31800
},
{
"epoch": 10.185185185185185,
"learning_rate": 6.419753086419753e-06,
"loss": 0.31549993515014646,
"step": 31900
},
{
"epoch": 10.217113665389528,
"learning_rate": 6.37718177948063e-06,
"loss": 0.3185459327697754,
"step": 32000
},
{
"epoch": 10.24904214559387,
"learning_rate": 6.334610472541508e-06,
"loss": 0.3061997413635254,
"step": 32100
},
{
"epoch": 10.280970625798211,
"learning_rate": 6.292039165602385e-06,
"loss": 0.30356922149658205,
"step": 32200
},
{
"epoch": 10.312899106002554,
"learning_rate": 6.249467858663262e-06,
"loss": 0.30677152633666993,
"step": 32300
},
{
"epoch": 10.344827586206897,
"learning_rate": 6.206896551724138e-06,
"loss": 0.3148806190490723,
"step": 32400
},
{
"epoch": 10.37675606641124,
"learning_rate": 6.164325244785016e-06,
"loss": 0.3114926528930664,
"step": 32500
},
{
"epoch": 10.40868454661558,
"learning_rate": 6.1217539378458925e-06,
"loss": 0.31096630096435546,
"step": 32600
},
{
"epoch": 10.440613026819923,
"learning_rate": 6.079182630906769e-06,
"loss": 0.3131294822692871,
"step": 32700
},
{
"epoch": 10.472541507024266,
"learning_rate": 6.036611323967646e-06,
"loss": 0.31513975143432615,
"step": 32800
},
{
"epoch": 10.504469987228608,
"learning_rate": 5.9940400170285225e-06,
"loss": 0.3035664939880371,
"step": 32900
},
{
"epoch": 10.53639846743295,
"learning_rate": 5.9514687100894e-06,
"loss": 0.30514934539794925,
"step": 33000
},
{
"epoch": 10.568326947637292,
"learning_rate": 5.9088974031502775e-06,
"loss": 0.3236639404296875,
"step": 33100
},
{
"epoch": 10.600255427841635,
"learning_rate": 5.866326096211154e-06,
"loss": 0.32729095458984375,
"step": 33200
},
{
"epoch": 10.632183908045977,
"learning_rate": 5.823754789272032e-06,
"loss": 0.30390745162963867,
"step": 33300
},
{
"epoch": 10.66411238825032,
"learning_rate": 5.781183482332908e-06,
"loss": 0.30320255279541014,
"step": 33400
},
{
"epoch": 10.696040868454661,
"learning_rate": 5.738612175393785e-06,
"loss": 0.31617319107055664,
"step": 33500
},
{
"epoch": 10.727969348659004,
"learning_rate": 5.696040868454662e-06,
"loss": 0.31349088668823244,
"step": 33600
},
{
"epoch": 10.759897828863346,
"learning_rate": 5.653469561515539e-06,
"loss": 0.3055162620544434,
"step": 33700
},
{
"epoch": 10.79182630906769,
"learning_rate": 5.610898254576416e-06,
"loss": 0.29683116912841795,
"step": 33800
},
{
"epoch": 10.82375478927203,
"learning_rate": 5.5683269476372925e-06,
"loss": 0.2972592926025391,
"step": 33900
},
{
"epoch": 10.855683269476373,
"learning_rate": 5.525755640698169e-06,
"loss": 0.30813514709472656,
"step": 34000
},
{
"epoch": 10.887611749680715,
"learning_rate": 5.4831843337590475e-06,
"loss": 0.311496696472168,
"step": 34100
},
{
"epoch": 10.919540229885058,
"learning_rate": 5.440613026819924e-06,
"loss": 0.3147770881652832,
"step": 34200
},
{
"epoch": 10.951468710089399,
"learning_rate": 5.398041719880801e-06,
"loss": 0.2977629852294922,
"step": 34300
},
{
"epoch": 10.983397190293742,
"learning_rate": 5.3554704129416775e-06,
"loss": 0.30722129821777344,
"step": 34400
},
{
"epoch": 11.0,
"eval_loss": 0.30927398800849915,
"step": 34452
},
{
"epoch": 11.015325670498084,
"learning_rate": 5.312899106002555e-06,
"loss": 0.30775304794311525,
"step": 34500
},
{
"epoch": 11.047254150702427,
"learning_rate": 5.270327799063432e-06,
"loss": 0.31844793319702147,
"step": 34600
},
{
"epoch": 11.079182630906768,
"learning_rate": 5.227756492124308e-06,
"loss": 0.3163930511474609,
"step": 34700
},
{
"epoch": 11.11111111111111,
"learning_rate": 5.185185185185185e-06,
"loss": 0.3178179359436035,
"step": 34800
},
{
"epoch": 11.143039591315453,
"learning_rate": 5.1426138782460625e-06,
"loss": 0.3107015609741211,
"step": 34900
},
{
"epoch": 11.174968071519796,
"learning_rate": 5.100042571306939e-06,
"loss": 0.31198001861572267,
"step": 35000
},
{
"epoch": 11.206896551724139,
"learning_rate": 5.057471264367817e-06,
"loss": 0.30287263870239256,
"step": 35100
},
{
"epoch": 11.23882503192848,
"learning_rate": 5.014899957428694e-06,
"loss": 0.3046586036682129,
"step": 35200
},
{
"epoch": 11.270753512132822,
"learning_rate": 4.972328650489571e-06,
"loss": 0.29926385879516604,
"step": 35300
},
{
"epoch": 11.302681992337165,
"learning_rate": 4.9297573435504475e-06,
"loss": 0.31370662689208983,
"step": 35400
},
{
"epoch": 11.334610472541508,
"learning_rate": 4.887186036611324e-06,
"loss": 0.3177505874633789,
"step": 35500
},
{
"epoch": 11.366538952745849,
"learning_rate": 4.844614729672202e-06,
"loss": 0.3028862190246582,
"step": 35600
},
{
"epoch": 11.398467432950191,
"learning_rate": 4.802043422733078e-06,
"loss": 0.2965104293823242,
"step": 35700
},
{
"epoch": 11.430395913154534,
"learning_rate": 4.759472115793956e-06,
"loss": 0.31520273208618166,
"step": 35800
},
{
"epoch": 11.462324393358877,
"learning_rate": 4.7169008088548325e-06,
"loss": 0.2961687469482422,
"step": 35900
},
{
"epoch": 11.494252873563218,
"learning_rate": 4.674329501915709e-06,
"loss": 0.30442037582397463,
"step": 36000
},
{
"epoch": 11.52618135376756,
"learning_rate": 4.631758194976586e-06,
"loss": 0.3178094863891602,
"step": 36100
},
{
"epoch": 11.558109833971903,
"learning_rate": 4.589186888037463e-06,
"loss": 0.3036604118347168,
"step": 36200
},
{
"epoch": 11.590038314176246,
"learning_rate": 4.54661558109834e-06,
"loss": 0.3115557861328125,
"step": 36300
},
{
"epoch": 11.621966794380587,
"learning_rate": 4.5040442741592175e-06,
"loss": 0.30345108032226564,
"step": 36400
},
{
"epoch": 11.65389527458493,
"learning_rate": 4.461472967220094e-06,
"loss": 0.30634918212890627,
"step": 36500
},
{
"epoch": 11.685823754789272,
"learning_rate": 4.418901660280971e-06,
"loss": 0.3051659774780273,
"step": 36600
},
{
"epoch": 11.717752234993615,
"learning_rate": 4.3763303533418475e-06,
"loss": 0.3069002342224121,
"step": 36700
},
{
"epoch": 11.749680715197957,
"learning_rate": 4.333759046402725e-06,
"loss": 0.30947404861450195,
"step": 36800
},
{
"epoch": 11.781609195402298,
"learning_rate": 4.291187739463602e-06,
"loss": 0.3073232650756836,
"step": 36900
},
{
"epoch": 11.813537675606641,
"learning_rate": 4.248616432524479e-06,
"loss": 0.3001542472839355,
"step": 37000
},
{
"epoch": 11.845466155810984,
"learning_rate": 4.206045125585356e-06,
"loss": 0.31570695877075194,
"step": 37100
},
{
"epoch": 11.877394636015326,
"learning_rate": 4.1634738186462325e-06,
"loss": 0.311122932434082,
"step": 37200
},
{
"epoch": 11.909323116219667,
"learning_rate": 4.12090251170711e-06,
"loss": 0.301647891998291,
"step": 37300
},
{
"epoch": 11.94125159642401,
"learning_rate": 4.078331204767987e-06,
"loss": 0.3205462646484375,
"step": 37400
},
{
"epoch": 11.973180076628353,
"learning_rate": 4.035759897828863e-06,
"loss": 0.29651784896850586,
"step": 37500
},
{
"epoch": 12.0,
"eval_loss": 0.3090454041957855,
"step": 37584
},
{
"epoch": 12.005108556832695,
"learning_rate": 3.993188590889741e-06,
"loss": 0.31057783126831057,
"step": 37600
},
{
"epoch": 12.037037037037036,
"learning_rate": 3.9506172839506175e-06,
"loss": 0.305908145904541,
"step": 37700
},
{
"epoch": 12.068965517241379,
"learning_rate": 3.908045977011495e-06,
"loss": 0.3145115280151367,
"step": 37800
},
{
"epoch": 12.100893997445722,
"learning_rate": 3.865474670072372e-06,
"loss": 0.3086430168151855,
"step": 37900
},
{
"epoch": 12.132822477650064,
"learning_rate": 3.822903363133248e-06,
"loss": 0.3069817733764648,
"step": 38000
},
{
"epoch": 12.164750957854405,
"learning_rate": 3.7803320561941254e-06,
"loss": 0.3044874954223633,
"step": 38100
},
{
"epoch": 12.196679438058748,
"learning_rate": 3.737760749255002e-06,
"loss": 0.3003558731079102,
"step": 38200
},
{
"epoch": 12.22860791826309,
"learning_rate": 3.6951894423158796e-06,
"loss": 0.2989999961853027,
"step": 38300
},
{
"epoch": 12.260536398467433,
"learning_rate": 3.6526181353767567e-06,
"loss": 0.30233255386352537,
"step": 38400
},
{
"epoch": 12.292464878671776,
"learning_rate": 3.6100468284376333e-06,
"loss": 0.30123531341552734,
"step": 38500
},
{
"epoch": 12.324393358876117,
"learning_rate": 3.56747552149851e-06,
"loss": 0.3095419502258301,
"step": 38600
},
{
"epoch": 12.35632183908046,
"learning_rate": 3.524904214559387e-06,
"loss": 0.32312957763671873,
"step": 38700
},
{
"epoch": 12.388250319284802,
"learning_rate": 3.4823329076202646e-06,
"loss": 0.31157236099243163,
"step": 38800
},
{
"epoch": 12.420178799489145,
"learning_rate": 3.4397616006811412e-06,
"loss": 0.3003998374938965,
"step": 38900
},
{
"epoch": 12.452107279693486,
"learning_rate": 3.3971902937420183e-06,
"loss": 0.3086379051208496,
"step": 39000
},
{
"epoch": 12.484035759897829,
"learning_rate": 3.354618986802895e-06,
"loss": 0.28147794723510744,
"step": 39100
},
{
"epoch": 12.515964240102171,
"learning_rate": 3.3120476798637717e-06,
"loss": 0.29010528564453125,
"step": 39200
},
{
"epoch": 12.547892720306514,
"learning_rate": 3.269476372924649e-06,
"loss": 0.30009984970092773,
"step": 39300
},
{
"epoch": 12.579821200510855,
"learning_rate": 3.2269050659855262e-06,
"loss": 0.3059814834594727,
"step": 39400
},
{
"epoch": 12.611749680715198,
"learning_rate": 3.184333759046403e-06,
"loss": 0.2934641456604004,
"step": 39500
},
{
"epoch": 12.64367816091954,
"learning_rate": 3.14176245210728e-06,
"loss": 0.30141252517700196,
"step": 39600
},
{
"epoch": 12.675606641123883,
"learning_rate": 3.0991911451681567e-06,
"loss": 0.3053057289123535,
"step": 39700
},
{
"epoch": 12.707535121328224,
"learning_rate": 3.056619838229034e-06,
"loss": 0.2935024261474609,
"step": 39800
},
{
"epoch": 12.739463601532567,
"learning_rate": 3.014048531289911e-06,
"loss": 0.2974138069152832,
"step": 39900
},
{
"epoch": 12.77139208173691,
"learning_rate": 2.971477224350788e-06,
"loss": 0.29385158538818357,
"step": 40000
},
{
"epoch": 12.803320561941252,
"learning_rate": 2.9289059174116646e-06,
"loss": 0.309177303314209,
"step": 40100
},
{
"epoch": 12.835249042145595,
"learning_rate": 2.8863346104725417e-06,
"loss": 0.30561391830444334,
"step": 40200
},
{
"epoch": 12.867177522349936,
"learning_rate": 2.8437633035334187e-06,
"loss": 0.30923063278198243,
"step": 40300
},
{
"epoch": 12.899106002554278,
"learning_rate": 2.801191996594296e-06,
"loss": 0.30836896896362304,
"step": 40400
},
{
"epoch": 12.931034482758621,
"learning_rate": 2.7586206896551725e-06,
"loss": 0.31578615188598635,
"step": 40500
},
{
"epoch": 12.962962962962964,
"learning_rate": 2.7160493827160496e-06,
"loss": 0.31291526794433594,
"step": 40600
},
{
"epoch": 12.994891443167305,
"learning_rate": 2.6734780757769262e-06,
"loss": 0.3064906311035156,
"step": 40700
},
{
"epoch": 13.0,
"eval_loss": 0.30278804898262024,
"step": 40716
},
{
"epoch": 13.026819923371647,
"learning_rate": 2.6309067688378037e-06,
"loss": 0.2991274642944336,
"step": 40800
},
{
"epoch": 13.05874840357599,
"learning_rate": 2.5883354618986804e-06,
"loss": 0.3081726837158203,
"step": 40900
},
{
"epoch": 13.090676883780333,
"learning_rate": 2.5457641549595575e-06,
"loss": 0.28322860717773435,
"step": 41000
},
{
"epoch": 13.122605363984674,
"learning_rate": 2.503192848020434e-06,
"loss": 0.3077671813964844,
"step": 41100
},
{
"epoch": 13.154533844189016,
"learning_rate": 2.4606215410813112e-06,
"loss": 0.3038086128234863,
"step": 41200
},
{
"epoch": 13.186462324393359,
"learning_rate": 2.4180502341421883e-06,
"loss": 0.3015581703186035,
"step": 41300
},
{
"epoch": 13.218390804597702,
"learning_rate": 2.3754789272030654e-06,
"loss": 0.30243860244750975,
"step": 41400
},
{
"epoch": 13.250319284802043,
"learning_rate": 2.332907620263942e-06,
"loss": 0.3104331398010254,
"step": 41500
},
{
"epoch": 13.282247765006385,
"learning_rate": 2.290336313324819e-06,
"loss": 0.30330204010009765,
"step": 41600
},
{
"epoch": 13.314176245210728,
"learning_rate": 2.2477650063856962e-06,
"loss": 0.2972829818725586,
"step": 41700
},
{
"epoch": 13.34610472541507,
"learning_rate": 2.205193699446573e-06,
"loss": 0.300672607421875,
"step": 41800
},
{
"epoch": 13.378033205619413,
"learning_rate": 2.1626223925074504e-06,
"loss": 0.30183706283569334,
"step": 41900
},
{
"epoch": 13.409961685823754,
"learning_rate": 2.120051085568327e-06,
"loss": 0.3096357536315918,
"step": 42000
},
{
"epoch": 13.441890166028097,
"learning_rate": 2.077479778629204e-06,
"loss": 0.3032659912109375,
"step": 42100
},
{
"epoch": 13.47381864623244,
"learning_rate": 2.0349084716900813e-06,
"loss": 0.2949547386169434,
"step": 42200
},
{
"epoch": 13.505747126436782,
"learning_rate": 1.992337164750958e-06,
"loss": 0.2889937973022461,
"step": 42300
},
{
"epoch": 13.537675606641123,
"learning_rate": 1.949765857811835e-06,
"loss": 0.30804216384887695,
"step": 42400
},
{
"epoch": 13.569604086845466,
"learning_rate": 1.9071945508727119e-06,
"loss": 0.30334890365600586,
"step": 42500
},
{
"epoch": 13.601532567049809,
"learning_rate": 1.864623243933589e-06,
"loss": 0.2941057586669922,
"step": 42600
},
{
"epoch": 13.633461047254151,
"learning_rate": 1.8220519369944658e-06,
"loss": 0.2950305938720703,
"step": 42700
},
{
"epoch": 13.665389527458492,
"learning_rate": 1.7794806300553427e-06,
"loss": 0.3061481285095215,
"step": 42800
},
{
"epoch": 13.697318007662835,
"learning_rate": 1.7369093231162198e-06,
"loss": 0.2870841217041016,
"step": 42900
},
{
"epoch": 13.729246487867178,
"learning_rate": 1.6943380161770967e-06,
"loss": 0.28978254318237306,
"step": 43000
},
{
"epoch": 13.76117496807152,
"learning_rate": 1.6517667092379737e-06,
"loss": 0.30341114044189454,
"step": 43100
},
{
"epoch": 13.793103448275861,
"learning_rate": 1.6091954022988506e-06,
"loss": 0.307810001373291,
"step": 43200
},
{
"epoch": 13.825031928480204,
"learning_rate": 1.5666240953597275e-06,
"loss": 0.30963399887084964,
"step": 43300
},
{
"epoch": 13.856960408684547,
"learning_rate": 1.5240527884206046e-06,
"loss": 0.3031142807006836,
"step": 43400
},
{
"epoch": 13.88888888888889,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.3144196891784668,
"step": 43500
},
{
"epoch": 13.920817369093232,
"learning_rate": 1.4389101745423588e-06,
"loss": 0.28602792739868166,
"step": 43600
},
{
"epoch": 13.952745849297573,
"learning_rate": 1.3963388676032354e-06,
"loss": 0.2984015083312988,
"step": 43700
},
{
"epoch": 13.984674329501916,
"learning_rate": 1.3537675606641127e-06,
"loss": 0.29513154983520506,
"step": 43800
},
{
"epoch": 14.0,
"eval_loss": 0.3065377175807953,
"step": 43848
},
{
"epoch": 14.016602809706258,
"learning_rate": 1.3111962537249896e-06,
"loss": 0.28543767929077146,
"step": 43900
},
{
"epoch": 14.048531289910601,
"learning_rate": 1.2686249467858662e-06,
"loss": 0.2963153839111328,
"step": 44000
},
{
"epoch": 14.080459770114942,
"learning_rate": 1.2260536398467433e-06,
"loss": 0.3030729293823242,
"step": 44100
},
{
"epoch": 14.112388250319285,
"learning_rate": 1.1834823329076204e-06,
"loss": 0.2889740180969238,
"step": 44200
},
{
"epoch": 14.144316730523627,
"learning_rate": 1.1409110259684973e-06,
"loss": 0.2915242385864258,
"step": 44300
},
{
"epoch": 14.17624521072797,
"learning_rate": 1.0983397190293744e-06,
"loss": 0.28075174331665037,
"step": 44400
},
{
"epoch": 14.20817369093231,
"learning_rate": 1.0557684120902512e-06,
"loss": 0.3046562194824219,
"step": 44500
},
{
"epoch": 14.240102171136654,
"learning_rate": 1.0131971051511283e-06,
"loss": 0.28045236587524414,
"step": 44600
},
{
"epoch": 14.272030651340996,
"learning_rate": 9.706257982120052e-07,
"loss": 0.2948748970031738,
"step": 44700
},
{
"epoch": 14.303959131545339,
"learning_rate": 9.280544912728822e-07,
"loss": 0.2956666946411133,
"step": 44800
},
{
"epoch": 14.33588761174968,
"learning_rate": 8.854831843337592e-07,
"loss": 0.2919887924194336,
"step": 44900
},
{
"epoch": 14.367816091954023,
"learning_rate": 8.429118773946361e-07,
"loss": 0.29414264678955077,
"step": 45000
},
{
"epoch": 14.399744572158365,
"learning_rate": 8.003405704555131e-07,
"loss": 0.2997250938415527,
"step": 45100
},
{
"epoch": 14.431673052362708,
"learning_rate": 7.5776926351639e-07,
"loss": 0.2927609634399414,
"step": 45200
},
{
"epoch": 14.46360153256705,
"learning_rate": 7.15197956577267e-07,
"loss": 0.30317195892333987,
"step": 45300
},
{
"epoch": 14.495530012771392,
"learning_rate": 6.72626649638144e-07,
"loss": 0.29728498458862307,
"step": 45400
},
{
"epoch": 14.527458492975734,
"learning_rate": 6.300553426990209e-07,
"loss": 0.2906969451904297,
"step": 45500
},
{
"epoch": 14.559386973180077,
"learning_rate": 5.874840357598978e-07,
"loss": 0.3137422752380371,
"step": 45600
},
{
"epoch": 14.59131545338442,
"learning_rate": 5.449127288207749e-07,
"loss": 0.3111775016784668,
"step": 45700
},
{
"epoch": 14.62324393358876,
"learning_rate": 5.023414218816518e-07,
"loss": 0.30036439895629885,
"step": 45800
},
{
"epoch": 14.655172413793103,
"learning_rate": 4.5977011494252875e-07,
"loss": 0.2988995361328125,
"step": 45900
},
{
"epoch": 14.687100893997446,
"learning_rate": 4.171988080034058e-07,
"loss": 0.306041259765625,
"step": 46000
},
{
"epoch": 14.719029374201789,
"learning_rate": 3.7462750106428265e-07,
"loss": 0.30521770477294924,
"step": 46100
},
{
"epoch": 14.75095785440613,
"learning_rate": 3.320561941251597e-07,
"loss": 0.28645307540893555,
"step": 46200
},
{
"epoch": 14.782886334610472,
"learning_rate": 2.894848871860366e-07,
"loss": 0.29830142974853513,
"step": 46300
},
{
"epoch": 14.814814814814815,
"learning_rate": 2.469135802469136e-07,
"loss": 0.29040414810180665,
"step": 46400
},
{
"epoch": 14.846743295019158,
"learning_rate": 2.0434227330779057e-07,
"loss": 0.2852139472961426,
"step": 46500
},
{
"epoch": 14.878671775223498,
"learning_rate": 1.6177096636866753e-07,
"loss": 0.291912841796875,
"step": 46600
},
{
"epoch": 14.910600255427841,
"learning_rate": 1.1919965942954449e-07,
"loss": 0.2825672149658203,
"step": 46700
},
{
"epoch": 14.942528735632184,
"learning_rate": 7.662835249042146e-08,
"loss": 0.3074253273010254,
"step": 46800
}
],
"max_steps": 46980,
"num_train_epochs": 15,
"total_flos": 76612640706201600,
"trial_name": null,
"trial_params": null
}